Changeset 2252


Ignore:
Timestamp:
09/03/07 17:33:03 (12 years ago)
Author:
selatham
Message:

Copes with MDIP format. Some new config file

Location:
TI01-discovery/trunk/ingestAutomation/OAIBatch
Files:
3 added
3 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/trunk/ingestAutomation/OAIBatch/SpaceTimeIngestFromMOLES.py

    r2088 r2252  
    8888                print "INFO: XML moles document %s does not contain a bounding box." %full_filename 
    8989                no_bbox=True 
     90            if bbox_list[0] =='None': 
     91                no_bbox=True 
     92 
    9093            try: 
    9194                dates=dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgTemporalCoverage.DateRange 
  • TI01-discovery/trunk/ingestAutomation/OAIBatch/bodc_config.properties

    r1988 r2252  
    55# 
    66#Define host_path as the full directory name where this data centre's records will be harvested into. 
    7 host_path /usr/local/jakarta-tomcat/webapps/oai/WEB-INF/harvested_records/grid-bodc-nerc-ac-uk-dif-ndg_bodc 
     7host_path /usr/local/jakarta-tomcat/webapps/oai/WEB-INF/harvested_records/grid-bodc-nerc-ac-uk-dif 
    88# 
    99#Define groups - portal groups for limiting searches by 'group of datacentres'. 
    10 groups NERC-DDC MDIP 
     10#groups NERC_DDC MDIP 
    1111# 
    1212#Define which format is harvested from the data centre (one only) 
  • TI01-discovery/trunk/ingestAutomation/OAIBatch/oai_ingest.py

    r2088 r2252  
    2929import SpaceTimeIngestFromMOLES 
    3030import keywordAdder 
    31 #import oaiClean 
     31from DIF import DIF 
     32from MDIP import MDIP 
     33 
     34def getID(filename): 
     35        ''' Gets the identifier out of an input metadata xml record. Copes with DIF and MDIP currently.''' 
     36        xml=file(filename).read() 
     37        if datacentre_format == "DIF": 
     38            d=DIF(xml) 
     39            ID=d.entryID 
     40        elif datacentre_format == "MDIP": 
     41            d=MDIP(xml) 
     42            ID=d.id 
     43        else: 
     44            sys.exit("Only handles DIF or MDIP here.") 
     45        return ID 
    3246 
    3347status = 0 
     
    156170        if filename.find('.xml') != -1: 
    157171                original_filename = indir + "/" + filename 
    158                 if datacentre_format == "DIF": 
    159                     from DIF import DIF 
    160                     from ETxmlView import loadET 
    161                     xml=file(original_filename).read() 
    162                     y=loadET(xml) 
    163                     d=DIF(xml) 
    164                     #print "ID extracted from the DIF = %s" %d.entryID 
    165                     if NDG_dataProvider: 
    166                         new_filename = outdir + "/"+d.entryID.replace(":","__")+".xml" 
    167                     else: 
    168                         new_filename = outdir + "/" +datacentre_namespace+ "__"+datacentre_format+ "__"+d.entryID+".xml" 
     172                ident=getID(original_filename) 
     173                print "ID extracted from the DIF = %s" %ident 
     174                if NDG_dataProvider: 
     175                    new_filename = outdir + "/"+ ident.replace(":","__")+".xml" 
    169176                else: 
    170                     sys.exit("Doesn't handle anything else but DIF here.") 
    171                 #print "original file = %s, newfile = %s" %(original_filename, new_filename) 
     177                    new_filename = outdir + "/" +datacentre_namespace+ "__"+datacentre_format+ "__"+ ident +".xml" 
     178                print "original file = %s, newfile = %s" %(original_filename, new_filename) 
    172179                commandline = "cp "+original_filename+ " " +new_filename 
    173180                #print "Executing : " + commandline 
     
    203210for filename in filenames: 
    204211        if filename.find('.xml') != -1: 
    205                 if datacentre_format == "DIF": 
    206212                    original_filename = outdir + "/" + filename 
    207                     from DIF import DIF 
    208                     from ETxmlView import loadET 
    209                     xml=file(original_filename).read() 
    210                     y=loadET(xml) 
    211                     d=DIF(xml) 
    212                     print "ID extracted from the DIF = %s" %d.entryID 
     213                    ident=getID(original_filename) 
    213214                    if NDG_dataProvider: 
    214                         molesLocalID = d.entryID.split(":",2)[2] 
     215                        molesLocalID = ident.split(":",2)[2] 
    215216                    else: 
    216                         molesLocalID = d.entryID 
     217                        molesLocalID = ident 
    217218                    print "molesLocalID is %s" %molesLocalID 
    218219                    commandline = "java -jar D2B/d2boneoff.jar repositoryID " +datacentre_namespace+" repositoryLocalID "+datacentre+" format "+ \ 
    219220                    datacentre_format+" repository xmldb:exist://glue.badc.rl.ac.uk:8080/exist/xmlrpc userpw xxxxxx targetCollection /db/discovery/original/"+ \ 
    220                     datacentre_format+"/"+datacentre_namespace +" inputRecordID "+d.entryID+ " outputLocalID "+molesLocalID+ " > ./DIF2MOLES/"+filename 
     221                    datacentre_format+"/"+datacentre_namespace +" inputRecordID "+ ident+ " outputLocalID "+molesLocalID+ " > ./DIF2MOLES/"+filename 
    221222                    print "Executing command to run d2boneoff.jar" 
    222223                    status= os.system(commandline) 
Note: See TracChangeset for help on using the changeset viewer.