Changeset 2324


Ignore:
Timestamp:
25/03/07 22:11:02 (12 years ago)
Author:
selatham
Message:

new SchemaNameSpace? corrector

Location:
TI01-discovery/trunk/ingestAutomation/OAIBatch
Files:
1 added
1 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/trunk/ingestAutomation/OAIBatch/oai_ingest.py

    r2306 r2324  
    2929import SpaceTimeIngestFromMOLES 
    3030import keywordAdder 
     31import SchemaNameSpace from SchemaNameSpace 
    3132from DIF import DIF 
    3233from MDIP import MDIP 
     
    150151    status= os.system(commandline) 
    151152 
     153# Create/clear the directory for the 'out' namespace corrected copy of the discovery records. 
     154if os.path.isdir("/usr/local/WSClients/OAIBatch/data/" + datacentre +"/discovery_corrected"): 
     155    commandline = "ls -1 /usr/local/WSClients/OAIBatch/data/" + datacentre +"/discovery/ | xargs -i rm /usr/local/WSClients/OAIBatch/data/" + datacentre +"/discovery/{\}" 
     156    print "Executing : " + commandline 
     157    status = os.system(commandline) 
     158else: 
     159    commandline = "mkdir /usr/local/WSClients/OAIBatch/data/" + datacentre +"/discovery_corrected" 
     160    print "Executing : " + commandline 
     161    status= os.system(commandline) 
     162 
    152163# The file config.properties contains the location of the particular datacentres harvested records. 
    153164# Copy the datacentre specific version of config to config.properties file. 
     
    171182                original_filename = indir + "/" + filename 
    172183                ident=getID(original_filename) 
    173                 print "ID extracted from the DIF = %s" %ident 
     184                print "ID extracted from the discovery record = %s" %ident 
    174185                if NDG_dataProvider: 
    175186                    new_filename = outdir + "/"+ ident.replace(":","__")+".xml" 
     
    192203if status !=0: 
    193204    sys.exit("Failed at ingesting into exist db. Datacentre =  %s. Status = %s" %(datacentre,status)) 
     205 
     206#replace any namespace declarations with a standard one which we know works in NDG 
     207indir="/usr/local/WSClients/OAIBatch/data/" + datacentre +"/discovery" 
     208outdir="/usr/local/WSClients/OAIBatch/data/" + datacentre +"/discovery_corrected" 
     209filenames = os.listdir(indir) 
     210for filename in filenames: 
     211        if filename.find('.xml') != -1: 
     212                    in_filename = indir + "/" + filename 
     213                    corrected_filename = outdir + "/" + filename 
     214                    try: 
     215                       SchemaNameSpace.main(in_filename, corrected_filename,datacentre_format) 
     216                    except: 
     217                       print "SchemaNameSpace failed on file %s"%in_filename 
    194218 
    195219#are there any old records hanging around.If so, remove. 
Note: See TracChangeset for help on using the changeset viewer.