Ignore:
Timestamp:
08/01/07 19:13:41 (13 years ago)
Author:
selatham
Message:

Changes for getting id for filename from inside Discovery record.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/trunk/ingestAutomation/OAIBatch/oai_ingest.py

    r1898 r1971  
    3636datacentre_format = "" 
    3737datacentre_namespace = "" 
     38NDG_dataProvider = False 
    3839 
    3940if (len(sys.argv) < 2): 
     
    7071    if words[0] == 'namespace': 
    7172        datacentre_namespace = words[1] 
     73    if words[0] == 'NDG_dataProvider': 
     74        NDG_dataProvider = True 
     75 
    7276datacentre_config_file.close() 
    7377 
     
    143147 
    144148 
    145 #Execute the script which processes/renames the files (changed 24/11/06 to simply create a re-named file in the outdir) 
     149#Execute the script which processes/renames the files (changed 08/01/07 to get id from inside file) 
    146150indir="/usr/local/WSClients/OAIBatch/data/" + datacentre +"/oai/originals" 
    147151outdir="/usr/local/WSClients/OAIBatch/data/" + datacentre +"/discovery" 
     
    151155        if filename.find('.xml') != -1: 
    152156                original_filename = indir + "/" + filename 
    153                 #print "Creating renamed file : " 
    154                 new_filename = outdir + "/" +datacentre_namespace+ "__" +filename.split('%3A')[-1] 
    155                 #print "original file = %s, newfile = %s" %(original_filename, new_filename) 
     157                if datacentre_format == "DIF": 
     158                    from DIF import DIF 
     159                    from ETxmlView import loadET 
     160                    xml=file(original_filename).read() 
     161                    y=loadET(xml) 
     162                    d=DIF(xml) 
     163                    print d.entryID 
     164                    if NDG_dataProvider: 
     165                        new_filename = outdir + "/"+d.entryID.replace(":","__")+".xml" 
     166                    else: 
     167                        new_filename = outdir + "/" +datacentre_namespace+ "__"+datacentre_format+ "__"+d.entryID+".xml" 
     168                else: 
     169                    sys.exit("Doesn't handle anything else but DIF here.") 
     170                #new_filename = outdir + "/" +datacentre_namespace+ "__"+datacentre_format+ "__"+filename.split('%3A')[-1] 
     171                print "original file = %s, newfile = %s" %(original_filename, new_filename) 
    156172                commandline = "cp "+original_filename+ " " +new_filename 
    157173                #print "Executing : " + commandline 
    158                 status = os.system(commandline) 
    159                 if status !=0: 
    160                         sys.exit("Failed at re-naming file stage") 
    161                 #oaiClean.oaiClean(indir,outdir,filename,wrapFlag) 
     174                status = os.system(commandline) 
     175                if status !=0: 
     176                    sys.exit("Failed at re-naming file stage") 
    162177                numfilesproc += 1 
    163178        else: 
     
    170185if status !=0: 
    171186    sys.exit("Failed at ingesting into exist db. Datacentre =  %s. Status = %s" %(datacentre,status)) 
     187 
     188#are there any old records hanging around.If so, copy away 
     189outdir = "./DIF2MOLES" 
     190try: 
     191    os.stat(outdir) 
     192except: 
     193    print "No old moles records hanging around" 
     194else: 
     195    commandline = "ls -1 ./DIF2MOLES | xargs -i rm ./DIF2MOLES/{\}" 
     196    print "Executing : " + commandline 
     197    status = os.system(commandline) 
     198    if status !=0: 
     199        sys.exit("Failed at clearing out DIF2MOLES area.") 
     200    commandline = "rmdir ./DIF2MOLES" 
     201    print "Executing : " + commandline 
     202    status = os.system(commandline) 
     203    if status !=0: 
     204        sys.exit("Failed at removing DIF2MOLES directory.") 
    172205 
    173206# Then run the minimum moles creator  which will run over all records in the supplied collection 
     
    179212    print "ERROR: couldn't create the minimum moles records" 
    180213    sys.exit 
    181 #are there any records 
    182 outdir = "./DIF2MOLES" 
     214#There should be some records now 
    183215try: 
    184216    os.stat(outdir) 
    185217except: 
    186     print "ERROR: couldn't create the minimum moles records for %s" %datacentre 
     218    print "ERROR: couldn't create any minimum moles records for %s" %datacentre 
    187219    sys.exit() 
    188220 
Note: See TracChangeset for help on using the changeset viewer.