Changeset 5241 for TI01-discovery/trunk


Ignore:
Timestamp:
30/04/09 11:23:04 (10 years ago)
Author:
cbyrom
Message:

Extend harvest functionality to allow it to run over all provider info

  • and to expose an entry point for scripts to use.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/trunk/OAIInfoEditor/oai_info_editor/lib/harvester.py

    r5239 r5241  
    44 @author: C Byrom, Tessella Feb 2009 
    55''' 
    6 import logging, commands, os 
     6import logging, commands, os, sys 
    77from oai_info_editor.model.repositoryinfo import RepositoryInfo 
     8from oai_info_editor.dal.providerinfodao import * 
     9from ndg.common.src.models.myconfig import myConfig 
    810 
    911 
     
    1517 
    1618    def __init__(self, outDir = None, proxyHost = '130.246.135.176', 
    17                  proxyPort = '8080'): 
     19                 proxyPort = '8080', configFile = None): 
    1820        ''' 
    1921        Constructor - initialise the Harvester class 
     
    2123        this typically doesn't change for different harvests - which is why it 
    2224        @keyword proxyHost: proxy host to use in comms - defaults to wwwcache.rl.ac.uk IP address 
    23         @keyword proxyPort: port for the proxy host to use - defaults to '8080'    
     25        @keyword proxyPort: port for the proxy host to use - defaults to '8080' 
    2426        is set in the constructor  
     27        @keyword configFile: ini file to define the outDir and the data for accessing the 
     28        OAI info editor data.  NB, the settings in configFile override the outDir keyword 
    2529        ''' 
    2630        logging.debug("Initialising Harvester object") 
     
    2832        self.proxyHost = proxyHost 
    2933        self.proxyPort = proxyPort 
     34        self.cf = None 
     35        if configFile: 
     36            self.cf = myConfig(configFile) 
     37            outDir = self.cf.get('DATA_STORE', 'harvestDir') 
     38            if outDir: 
     39                self.outDir = outDir 
     40         
     41        self.dao = None # data access object for retrieving all providers info 
    3042        logging.info("ProviderInfoDAO initialised") 
    3143     
     
    144156            harvestCMD += ' -splitBySet:%s' %repositoryInfo.splitBySet 
    145157        return harvestCMD 
     158 
     159 
     160    def harvestAll(self): 
     161        ''' 
     162        Harvest all data specified in the oai info editor files 
     163        ''' 
     164        logging.info("Harvesting all available provider info") 
     165        if not self.cf: 
     166            raise ValueError("No config file available - cannot get info to harvest all provider info.") 
     167 
     168        if not self.dao: 
     169            self.dao = createDAOWithClient(client = FILE_CLIENT_TYPE,  
     170                                           configFile = self.cf) 
     171         
     172        pis = self.dao.getAllProviderInfo() 
     173        for pi in pis: 
     174            logging.info("Harvesting info for provider, '%s'" %pi.name) 
     175            for ri in pi.repositoryInfos: 
     176                logging.info(" - harvesting repository info, '%s'" %ri.name) 
     177                self.harvestRepository(ri) 
     178                 
     179        logging.info("- harvesting complete") 
     180     
     181         
     182# entry point for running as a script - e.g. via crontab 
     183if __name__=="__main__": 
     184         
     185    loggingLevel = logging.DEBUG 
     186    logging.basicConfig(level = loggingLevel, 
     187                        format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s') 
     188     
     189    if len(sys.argv) < 2: 
     190        raise ValueError("Usage: harvester <configFile>\n\n- NB, config file should be the ini file used by the oai info editor.") 
     191    h = Harvester(configFile = sys.argv[1]) 
     192    h.harvestAll() 
     193     
Note: See TracChangeset for help on using the changeset viewer.