Changeset 2252
- Timestamp:
- 09/03/07 17:33:03 (14 years ago)
- Location:
- TI01-discovery/trunk/ingestAutomation/OAIBatch
- Files:
-
- 3 added
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
TI01-discovery/trunk/ingestAutomation/OAIBatch/SpaceTimeIngestFromMOLES.py
r2088 r2252 88 88 print "INFO: XML moles document %s does not contain a bounding box." %full_filename 89 89 no_bbox=True 90 if bbox_list[0] =='None': 91 no_bbox=True 92 90 93 try: 91 94 dates=dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgTemporalCoverage.DateRange -
TI01-discovery/trunk/ingestAutomation/OAIBatch/bodc_config.properties
r1988 r2252 5 5 # 6 6 #Define host_path as the full directory name where this data centre's records will be harvested into. 7 host_path /usr/local/jakarta-tomcat/webapps/oai/WEB-INF/harvested_records/grid-bodc-nerc-ac-uk-dif -ndg_bodc7 host_path /usr/local/jakarta-tomcat/webapps/oai/WEB-INF/harvested_records/grid-bodc-nerc-ac-uk-dif 8 8 # 9 9 #Define groups - portal groups for limiting searches by 'group of datacentres'. 10 groups NERC-DDC MDIP10 #groups NERC_DDC MDIP 11 11 # 12 12 #Define which format is harvested from the data centre (one only) -
TI01-discovery/trunk/ingestAutomation/OAIBatch/oai_ingest.py
r2088 r2252 29 29 import SpaceTimeIngestFromMOLES 30 30 import keywordAdder 31 #import oaiClean 31 from DIF import DIF 32 from MDIP import MDIP 33 34 def getID(filename): 35 ''' Gets the identifier out of an input metadata xml record. Copes with DIF and MDIP currently.''' 36 xml=file(filename).read() 37 if datacentre_format == "DIF": 38 d=DIF(xml) 39 ID=d.entryID 40 elif datacentre_format == "MDIP": 41 d=MDIP(xml) 42 ID=d.id 43 else: 44 sys.exit("Only handles DIF or MDIP here.") 45 return ID 32 46 33 47 status = 0 … … 156 170 if filename.find('.xml') != -1: 157 171 original_filename = indir + "/" + filename 158 if datacentre_format == "DIF": 159 from DIF import DIF 160 from ETxmlView import loadET 161 xml=file(original_filename).read() 162 y=loadET(xml) 163 d=DIF(xml) 164 #print "ID extracted from the DIF = %s" %d.entryID 165 if NDG_dataProvider: 166 new_filename = outdir + "/"+d.entryID.replace(":","__")+".xml" 167 else: 168 new_filename = outdir + "/" +datacentre_namespace+ "__"+datacentre_format+ "__"+d.entryID+".xml" 172 ident=getID(original_filename) 173 print "ID extracted from the DIF = %s" %ident 174 if NDG_dataProvider: 175 new_filename = outdir + "/"+ ident.replace(":","__")+".xml" 169 176 else: 170 sys.exit("Doesn't handle anything else but DIF here.")171 #print "original file = %s, newfile = %s" %(original_filename, new_filename)177 new_filename = outdir + "/" +datacentre_namespace+ "__"+datacentre_format+ "__"+ ident +".xml" 178 print "original file = %s, newfile = %s" %(original_filename, new_filename) 172 179 commandline = "cp "+original_filename+ " " +new_filename 173 180 #print "Executing : " + commandline … … 203 210 for filename in filenames: 204 211 if filename.find('.xml') != -1: 205 if datacentre_format == "DIF":206 212 original_filename = outdir + "/" + filename 207 from DIF import DIF 208 from ETxmlView import loadET 209 xml=file(original_filename).read() 210 y=loadET(xml) 211 d=DIF(xml) 212 print "ID extracted from the DIF = %s" %d.entryID 213 ident=getID(original_filename) 213 214 if NDG_dataProvider: 214 molesLocalID = d.entryID.split(":",2)[2]215 molesLocalID = ident.split(":",2)[2] 215 216 else: 216 molesLocalID = d.entryID217 molesLocalID = ident 217 218 print "molesLocalID is %s" %molesLocalID 218 219 commandline = "java -jar D2B/d2boneoff.jar repositoryID " +datacentre_namespace+" repositoryLocalID "+datacentre+" format "+ \ 219 220 datacentre_format+" repository xmldb:exist://glue.badc.rl.ac.uk:8080/exist/xmlrpc userpw xxxxxx targetCollection /db/discovery/original/"+ \ 220 datacentre_format+"/"+datacentre_namespace +" inputRecordID "+ d.entryID+ " outputLocalID "+molesLocalID+ " > ./DIF2MOLES/"+filename221 datacentre_format+"/"+datacentre_namespace +" inputRecordID "+ ident+ " outputLocalID "+molesLocalID+ " > ./DIF2MOLES/"+filename 221 222 print "Executing command to run d2boneoff.jar" 222 223 status= os.system(commandline)
Note: See TracChangeset
for help on using the changeset viewer.