Changeset 4257


Ignore:
Timestamp:
30/09/08 16:58:15 (11 years ago)
Author:
cbyrom
Message:

Fix handling of ndg hosted data - properly reading config settings from file
in ingest + improve use of default value checking.

Location:
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/PostgresRecord.py

    r4223 r4257  
    3333    ''' 
    3434    # TODO MDIP transforms do not work very well for lots of files - so currently hiding these 
    35     documentTypes = ['MOLES', 'DIF', 'DC', 'ISO19139','MDIP'] 
     35    documentTypes = ['MOLES', 'DIF', 'DC', 'ISO19139', 'MDIP'] 
    3636     
    3737    # vocab server - used for finding scope values in the moles files 
    3838    ndg_data_provider_vocab = "http://vocab.ndg.nerc.ac.uk/term/N010" 
    3939         
    40     def __init__(self, filename, ndg_dataprovider, datacentre_groups, datacentre_namespace, discovery_id, xq, docType): 
     40    def __init__(self, filename, ndg_dataprovider, datacentre_groups, \ 
     41                 datacentre_namespace, discovery_id, xq, docType): 
    4142        logging.info("Setting up Postgres record for file, " + filename) 
    4243        self.filename = filename 
     
    229230 
    230231        # add keywords, if required 
    231         if self._datacentre_groups != "": 
     232        if self._datacentre_groups: 
    232233            self.addKeywords() 
    233234         
     
    317318        @return: TimeRange object array with temporal data 
    318319        ''' 
    319          
    320320        if self.stData is None: 
    321              
    322321            self.getSpatioTemporalData() 
    323    
     322         
    324323        return self.stData.getTemporalData() 
    325324         
     
    354353        Extract spatio temporal data from the original document 
    355354        ''' 
    356          
    357         
    358355        logging.info('Retrieving spatiotemporal info from moles file') 
    359356        # initialise the various spatiotemporal arrays used to extract data to 
     
    377374            self.getCoordData(self.dgMeta) 
    378375 
    379          
    380  
    381376        #SJD error with line below- this is where 23/09/08 edit in PostgresDAO fudge sorts... 
    382377        if not self.dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgTemporalCoverage: 
    383              
    384378            logging.info("No temporal coverage elements found - assuming no temporal data available") 
    385379        else: 
    386             
    387380            self.getTimeRangeData(self.dgMeta) 
    388381 
     
    522515        try: 
    523516 
    524             
    525517            bboxes = dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgSpatialCoverage.BoundingBox 
    526518             
     
    528520                logging.info("No bounding box info found for document") 
    529521                return 
    530             
     522                 
    531523            bbox_list=self.listify(bboxes) 
    532524            #parse the list of coordinates 
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/keywordAdder.py

    r4027 r4257  
    4949 
    5050def main(indir, outdir, keywords): 
    51     if len(sys.argv) < 2 or indir == "" or outdir == "" or keywords == []: 
     51    if not indir or not outdir or not keywords: 
    5252        print "USAGE: keywordAdder indir, outdir, keywords " 
    5353        print " where indir= full path of directory where MOLES records reside," 
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/oai_document_ingester.py

    r3972 r4257  
    1818import db_funcs 
    1919 
    20 class oai_document_ingester: 
     20class oai_document_ingester(object): 
    2121        ''' 
    2222        Class to handle the ingest of files from the OAI harvester to the discovery service postgres DB 
     
    6464                try: 
    6565                        discoveryID = self.getID(filename) 
    66                          
    6766                        record = PostgresRecord(filename, self._NDG_dataProvider, \ 
    6867                                                            self._datacentre_groups, self._datacentre_namespace, \ 
     
    125124                    if words[0] == 'namespace': 
    126125                        self._datacentre_namespace = words[1] 
    127                     if words[0] == 'self._NDG_dataProvider': 
     126                    if words[0] == 'NDG_dataProvider': 
    128127                        self._NDG_dataProvider = True 
    129128                 
Note: See TracChangeset for help on using the changeset viewer.