Changeset 5253 for TI01-discovery/trunk


Ignore:
Timestamp:
05/05/09 17:18:36 (10 years ago)
Author:
cbyrom
Message:

Add code to allow ingest of docs when harvest button clicked - NB,
harvest completes first of all, then the ingest is ran asynchronously
since it can take some time. If the user has provided an email
address for the provider, the results are then mailed to them.

Location:
TI01-discovery/trunk/OAIInfoEditor/oai_info_editor
Files:
1 added
6 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/trunk/OAIInfoEditor/oai_info_editor/config/oiemiddleware.py

    r5239 r5253  
    5151            raise ValueError("No harvest directory config data has been specified - so cannot harvest files.") 
    5252         
    53         self.globals.harvester = Harvester(outDir = harvestDir) 
     53        self.globals.harvester = Harvester(self.globals.mailServer, outDir = harvestDir) 
    5454 
    5555 
  • TI01-discovery/trunk/OAIInfoEditor/oai_info_editor/controllers/harvest.py

    r5245 r5253  
    2222        logging.info("Setting up page to display info for provider, '%s'" %providerName) 
    2323 
     24        pi = g.dao.getProviderInfo(providerName) 
    2425        ri = g.dao.getRepositoryInfo(providerName, repositoryName) 
    2526        if not ri: 
     
    2829 
    2930        try: 
    30             status, result = g.harvester.runHarvestAndIngest(providerName, ri) 
     31            status, result = g.harvester.runHarvestAndIngest(providerName, ri, 
     32                                                             ingestAsynch = True, 
     33                                                             userEmail = pi.email) 
    3134        except Exception, e: 
    3235            c.errors = {'System error': e} 
     
    3639            %(repositoryName, providerName, result) 
    3740        if not status: 
    38             outMessage = "A problem (%s) occurred whilst harvesting the data - please retry later" \ 
    39                 %result 
     41            outMessage = "A problem occurred whilst harvesting the data - please retry later." 
     42            c.errors = {'Harvest/ingest error': result} 
     43            return render('genshi', 'error') 
    4044 
    4145        # now return to the home page - and display pop up 
  • TI01-discovery/trunk/OAIInfoEditor/oai_info_editor/lib/constants.py

    r5239 r5253  
    1414VIEW_ALL_DATA_TITLE = "View all provider data" 
    1515EDIT_PAGE_TITLE = "Edit data for provider, '%s'" 
     16 
     17INGEST_RESULTS_TITLE = 'Results from %s document ingest' 
    1618 
    1719SUBMIT_BUTTON_TEXT = 'Submit checked requests together' 
  • TI01-discovery/trunk/OAIInfoEditor/oai_info_editor/lib/harvester.py

    r5245 r5253  
    99from ndg.common.src.models.myconfig import myConfig 
    1010from OAIBatch.oai_document_ingester import oai_document_ingester 
    11  
     11from ndg.common.src.lib.mailer import mailHandler 
     12import oai_info_editor.lib.constants as constants 
     13from threading import Thread 
     14 
     15class IngestThread(Thread): 
     16    ''' 
     17    Class to allow ingesting of docs asynchronously - i.e. in a new thread 
     18    NB, the results of this are reported by email  
     19    ''' 
     20     
     21    def __init__ (self, harvester, providerName, harvestDir, format): 
     22        ''' 
     23        Constructor for setting up thread for asynchronous ingest of docs 
     24        @param harvester: Harvester instance to do the ingesting 
     25        @param providerName: name of provider to ingest docs from 
     26        @param harvestDir: directory to ingest files from 
     27        @param format: format of docs to ingest  
     28        ''' 
     29        logging.info("Setting up thread to ingest data for datacentre, '%s'" %providerName) 
     30        Thread.__init__(self) 
     31        self.harvester = harvester 
     32        self.providerName = providerName 
     33        self.harvestDir = harvestDir 
     34        self.format = format 
     35        logging.info("- finished setting up thread") 
     36 
     37       
     38    def run(self): 
     39        logging.info("Running thread to ingest datacentre docs") 
     40        isSuccess, ingestMessage = self.harvester.ingestDocuments(self.providerName,  
     41                                                                  self.harvestDir, 
     42                                                                  self.format) 
     43        logging.info("- finished ingesting data") 
     44        if self.harvester.userEmail: 
     45            logging.info("Sending notification mail to '%s'" %self.harvester.userEmail) 
     46            status, message = mailHandler([self.harvester.userEmail],  
     47                                          constants.INGEST_RESULTS_TITLE %self.providerName,  
     48                                          ingestMessage, 
     49                                          server = self.harvester.mailServer) 
     50            logging.info("- email sent") 
     51        logging.info("Ingest procedure complete") 
     52             
    1253 
    1354class Harvester(object): 
     
    1758    JAVA_COMMAND = 'java %s -cp lib/DLESETools.jar:lib/jdom-b7.jar:lib/xercesImpl.jar:lib/xml-apis.jar org.dlese.dpc.oai.harvester.Harvester %s %s %s' 
    1859 
    19     def __init__(self, outDir = None, proxyHost = '130.246.135.176', 
    20                  proxyPort = '8080', configFile = None): 
     60    def __init__(self, mailServer, outDir = None,  
     61                  proxyHost = '130.246.135.176', 
     62                  proxyPort = '8080', configFile = None): 
    2163        ''' 
    2264        Constructor - initialise the Harvester class 
     65        @param mailServer: server for sending notification mails from the asynch ingest 
    2366        @keyword outDir: directory to harvest files to - NB, 
    2467        this typically doesn't change for different harvests - which is why it 
     
    3073        ''' 
    3174        logging.debug("Initialising Harvester object") 
     75        self.mailServer = mailServer 
    3276        self.outDir = outDir 
    3377        self.proxyHost = proxyHost 
     
    5296        be harvested 
    5397        @keyword outDir: directory to harvest files to 
     98        @raise SystemError: if run harvester on a Windows machine  
    5499        @raise ValueError: if repositoryInfo is not a RepositoryInfo object 
    55         @return status, outMessage: Status = True, if successful, False otherwise 
    56         outMessage = summary of harvest outcome  
     100        @return status, outMessage, harvestDir: Status = True, if successful, False otherwise 
     101        outMessage = summary of harvest outcome, harvestDir = directory docs harvested to  
    57102        ''' 
    58103        logging.info("Running data harvest") 
     
    123168            # apostrophes so remove any in the message string 
    124169            outMessage = outMessage.replace('\'', '') 
    125             return False, outMessage 
     170            return False, outMessage, localHarvestDir 
    126171 
    127172        logging.info("- harvest completed successfully") 
    128173        # NB, the last line has the summary of the harvest - so just return this 
    129         return True, data[-1] 
    130  
    131  
    132     def ingestDocuments(self, providerName): 
     174        return True, data[-1], localHarvestDir 
     175 
     176 
     177    def ingestDocuments(self, providerName, harvestDir, dataFormat): 
    133178        ''' 
    134179        Ingest harvested documents into the discovery service 
    135180        @param providerName: Name of provider whose documents should be ingested 
     181        @param harvestDir: Directory to ingest docs from 
     182        @param dataFormat: format of data to ingest 
    136183        @return status, outMessage: Status = True, if successful, False otherwise 
    137184        outMessage = summary of ingest outcome  
     
    140187        if not self.ingester: 
    141188            self.ingester = oai_document_ingester() 
    142          
    143         result = "" 
    144         isSuccess = False 
     189 
    145190        try: 
    146             result = ingester.processDataCentre(datacentre) 
    147             isSuccess = True 
    148         except: 
    149             logging.error("Exception thrown - detail: ") 
    150             result = sys.exc_info() 
    151             logging.error(result) 
    152             logging.info("Continue processing next datacentre config file...") 
    153  
     191            isSuccess, result = self.ingester.processDataCentre(providerName,  
     192                                                                harvestDir = harvestDir, 
     193                                                                dataFormat = dataFormat) 
     194        except Exception, e: 
     195            isSuccess = False 
     196            logging.error(e) 
     197            result = "Unexpected error occurred during ingest: %s" %str(e) 
     198 
     199        logging.debug(result) 
    154200        logging.debug("Document ingest complete") 
    155201        return isSuccess, result 
    156202 
    157203 
    158     def runHarvestAndIngest(self, providerName, repositoryInfo, outDir = None): 
     204    def runHarvestAndIngest(self, providerName, repositoryInfo, outDir = None, 
     205                                                          ingestAsynch = False, userEmail = None): 
    159206        ''' 
    160207        Harvest and ingest documents into the discovery service 
     
    163210        be harvested 
    164211        @keyword outDir: directory to harvest files to 
     212        @keyword ingestAsynch: if True, run the ingest stage in a separate thread. Default = False  
     213        @keyword userEmail: mail address to send notification mails to. Default = None 
    165214        @return status, outMessage: Status = True, if successful, False otherwise 
    166215        outMessage = summary of ingest outcome  
    167216        ''' 
    168217        logging.debug("Running harvest then ingest") 
    169         isSuccess, outMessage = self.harvestRepository(repositoryInfo, outDir = outDir) 
     218        self.userEmail = userEmail 
     219        isSuccess, outMessage, harvestDir = self.harvestRepository(repositoryInfo, outDir = outDir) 
    170220        if isSuccess: 
     221            ingestMessage = "" 
    171222            logging.debug("- harvest successful, so running ingest") 
    172             isSuccess, ingestMessage = self.ingestDocuments(providerName) 
     223            if ingestAsynch: 
     224                logging.debug("- running ingest asynchronously") 
     225                thread = IngestThread(self, providerName, harvestDir,  
     226                                                  repositoryInfo.dataFormat) 
     227                thread.start() 
     228                ingestMessage += "  Document ingest is now running" 
     229                if not self.userEmail: 
     230                    ingestMessage += " - NB, if you wish to see the results, please include an email addresss with the provider data." 
     231                else: 
     232                    ingestMessage += " - once complete, results will be emailed to %s" %userEmail 
     233            else: 
     234                logging.debug("- running ingest synchronously") 
     235                isSuccess, ingestMessage = self.ingestDocuments(providerName,  
     236                                                                                                                        harvestDir, 
     237                                                                                                                        repositoryInfo.dataFormat) 
    173238             
    174239            outMessage += ingestMessage 
  • TI01-discovery/trunk/OAIInfoEditor/oai_info_editor/templates/home.html

    r5239 r5253  
    1919            <div class="metadata"> 
    2020                            <h1>$c.title</h1> 
     21                        <div py:replace="displayErrors()"/> 
    2122                                <div py:if="c.createLink"> 
    2223                                    <h2> 
  • TI01-discovery/trunk/OAIInfoEditor/oai_info_editor/tests/lib/testharvester.py

    r5239 r5253  
    3333    def testInvalidHarvestRepository(self): 
    3434        self.h.JAVA_COMMAND = HARVEST_COMMAND 
    35         status, out = self.h.harvestRepository(REPOSITORY_INFO_1, outDir = VALID_HARVEST_DIR) 
     35        status, out, dir = self.h.harvestRepository(REPOSITORY_INFO_1, outDir = VALID_HARVEST_DIR) 
    3636        self.assertFalse(status) 
    3737        self.assertEquals('Error: The request for data resulted in an invalid response from the provider. Error: Server returned HTTP response code: 400 for URL: http://rep2.ac.uk?verb=Identify', out) 
     38        self.assertEquals('tmp/rep2-ac-uk-DIF', dir) 
    3839 
    3940    def testHarvestRepository(self): 
    4041        self.h.JAVA_COMMAND = HARVEST_COMMAND 
    41         status, out = self.h.harvestRepository(VALID_REAL_REPOSITORY_INFO, outDir = VALID_HARVEST_DIR) 
     42        status, out, dir = self.h.harvestRepository(VALID_REAL_REPOSITORY_INFO, outDir = VALID_HARVEST_DIR) 
    4243        self.assertTrue(status) 
    4344        self.assertTrue(out.find('Harvest of http://badc.nerc.ac.uk/badc_oai/provider is complete') > -1) 
     45        self.assertEquals('tmp/badc-nerc-ac-uk-dif', dir) 
    4446     
    4547    def tearDown(self): 
    46         cleanDir(VALID_HARVEST_DIR) 
    47         os.rmdir(VALID_HARVEST_DIR) 
     48        if exists(VALID_HARVEST_DIR): 
     49            cleanDir(VALID_HARVEST_DIR) 
     50            os.rmdir(VALID_HARVEST_DIR) 
Note: See TracChangeset for help on using the changeset viewer.