Ignore:
Timestamp:
05/05/09 14:49:17 (11 years ago)
Author:
cbyrom
Message:

Update ingest scripts for use with the OAIInfoEditor harvest
functionality - to allow config and jar file resources to be
retrieved even if not running in ingest package + allow different
harvest directory and format, compared with that of the config
file, to be specified.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/oai_document_ingester.py

    r5243 r5248  
    2323        ''' 
    2424 
    25         def processDataCentre(self, dataCentre): 
    26                  
     25        def processDataCentre(self, dataCentre, harvestDir = None, dataFormat = None): 
     26                ''' 
     27                Ingest documents from the specified data centre 
     28                @param dataCentre: data centre to ingest docs from 
     29                @keyword harvestDir: directory to get docs from - NB, this will override that 
     30                specified in the associated config file.  Typically this is used when a manual 
     31                harvest has retrieved docs to a local dir (see OAIInfoEditor.lib.harvester).  
     32                @param dataFormat: format of data to ingest.  Overrides config file settings. 
     33                @return isSuccess, outMessage: isSuccess = True if ingest completes ok 
     34                outMessage = summary of ingest process 
     35                ''' 
    2736                self._no_files_ingested = 0 
    2837                self._no_problem_files = 0 
    2938                self._error_messages = '' 
    3039                self.dataCentre = dataCentre 
    31                  
    3240                self._base_dir = os.getcwd() + "/" # this is the base dir that the script is ran from 
    3341                self._setupDataCentreDirs() 
     
    4048                 
    4149                self.getConfigDetails(dataCentre) 
     50                # override default settings with input keyword values, if set 
     51                if harvestDir: 
     52                        self._harvest_home = harvestDir 
     53                if dataFormat: 
     54                        self._datacentre_format = dataFormat 
    4255                 
    4356                # check harvest dir exists and that there are any records to harvest? 
    4457                if not os.path.exists(self._harvest_home): 
    45                         logging.info("Harvest directory for dataCentre %s (%s) could not be found - exiting" \ 
     58                        logging.error("Harvest directory for dataCentre %s (%s) could not be found - exiting" \ 
    4659                                                 %(dataCentre, self._harvest_home)) 
    4760                        return 
     
    6780                outMessage = "OAI Document ingest processing complete:" 
    6881                logging.info("oai_document_ingest processing complete:") 
     82                isSuccess = False 
    6983                if self._no_problem_files == 0: 
    7084                        logging.info("All files successfully processed - cleaning harvest directory") 
    7185                        FileUtilities.cleanDir(self._harvest_home) 
     86                        isSuccess = True 
    7287                else: 
    7388                        logging.error("Problems experienced with %s files" %self._no_problem_files) 
     
    7590                 
    7691                logging.info(self.lineSeparator) 
    77                 logging.info("INFO: Number of files processed = %s" %numfilesproc) 
    78                 outMessage += 'Number of files processed = %s' %numfilesproc 
    79                 logging.info("INFO: Number of files ingested = %s" %self._no_files_ingested) 
    80                 outMessage += 'Number of files ingestest = %s' %self._no_files_ingested 
     92                message = 'Number of files processed = %s\n' %numfilesproc 
     93                logging.info(message) 
     94                outMessage += message 
     95                message = "Number of files ingested = %s\n" %self._no_files_ingested 
     96                logging.info(message) 
     97                outMessage += message 
    8198                if self._error_messages: 
    8299                        outMessage += 'Errors: %s' %self._error_messages  
    83100                logging.info(self.lineSeparator) 
    84101                print "Script finished running." 
    85                 return outMessage 
     102                return isSuccess, outMessage 
    86103 
    87104         
Note: See TracChangeset for help on using the changeset viewer.