Changeset 3803 for TI01-discovery
- Timestamp:
- 18/04/08 15:21:14 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
TI01-discovery/trunk/ingestAutomation/OAIBatch/oai_ingest.py
r3785 r3803 1 1 #!/usr/bin/env python 2 """ Script oai_ingest.py takes parameters <datacentre> <dbinfofile> <existhost> <backupdir>. 2 """ Script oai_ingest.py takes parameters <datacentre> <dbinfofile> <existhost> <backupdir> <javahome>. 3 NOTE:might have to change the location of tomcat if it's not in the usual place (/usr/local/tomcat). 4 3 5 The /usr/local/WSClients/OAIBatch directory contains:- 4 6 - this python script, plus some other modules eg ndgUtils for parts of the process. … … 8 10 ./data 9 11 - /DATACENTRE/ 10 - discovery/:Re-named documents.11 - discovery_corrected Documents with schema namespaces corrected, ready to ingest in the discovery service.12 12 - discovery/: Re-named documents. 13 - discovery_corrected Documents with schema namespaces corrected, ready to ingest in the discovery service. 14 - oai/difYYYYMMDD/ Documents as harvested from OAI 13 15 Where /DATACENTRE varies for the different data providers 14 16 """ … … 24 26 # December 2007 SEL rewrite to use Bryans' python XQuery stuff to create mini-moles instead of java. 25 27 # Also extracted hard coded pwds into a file. 28 # April 2008 SEL use exist interface that comes with the .war rather than the old-hat stuff in exist-client 26 29 27 30 import os … … 64 67 NDG_dataProvider = False 65 68 66 if (len(sys.argv) < 5):67 print "ERROR: <datacentre> or <db info file> or <existhost> or <backupdir> parameter not supplied."69 if (len(sys.argv) < 6): 70 print "ERROR: <datacentre> or <db info file> or <existhost> or <backupdir> or <java_home> parameter not supplied." 68 71 sys.exit() 69 72 else: … … 74 77 backupdir = sys.argv[4] 75 78 #backupdir = '/disks/glue1/oaiBackup/' 79 java_home = sys.argv[5] 76 80 77 81 #Change os directory to that with the code in it. … … 80 84 # Other settings and constants 81 85 date_string = commands.getoutput ("date +'%y%m%d_%H%M'") 82 os.putenv ('EXIST_HOME', '/usr/local/exist-client')86 #os.putenv ('EXIST_HOME', '/usr/local/exist-client') 83 87 #os.putenv ('JAVA_HOME', '/usr/java/jdk1.5.0_03') 84 os.putenv ('PATH', ':/usr/java/jdk1.5.0_03/bin:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.')88 #os.putenv ('PATH', ':/usr/java/jdk1.5.0_03/bin:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.') 85 89 #os.putenv ('CLASSPATH','.:/usr/java/jdk1.5.0_03/lib/tools.jar') 86 90 … … 115 119 116 120 if harvest_home == "": 117 sys.exit(" Failed at getting harvested records directory stage. datacentre config file tried = %s" %datacentre_config_filename)121 sys.exit("ERROR: Failed at getting harvested records directory stage. datacentre config file tried = %s" %datacentre_config_filename) 118 122 else: 119 123 print "INFO: harvested records are in %s" %harvest_home … … 125 129 126 130 if datacentre_format == "": 127 sys.exit(" Failed at stage: getting datacentre format. datacentre config file tried = %s" %datacentre_config_filename)131 sys.exit("ERROR: Failed at stage: getting datacentre format. datacentre config file tried = %s" %datacentre_config_filename) 128 132 else: 129 133 print "INFO: format being harvested = %s" %datacentre_format 130 134 131 135 if datacentre_namespace == "": 132 sys.exit(" Failed at stage: getting datacentre namespace. datacentre config file tried = %s" %datacentre_config_filename)136 sys.exit("ERROR: Failed at stage: getting datacentre namespace. datacentre config file tried = %s" %datacentre_config_filename) 133 137 else: 134 138 print "INFO: datacentre namespace = %s" %datacentre_namespace … … 164 168 165 169 if status != 0: 166 sys.exit(" Failed at creating copy dir stage")170 sys.exit("ERROR: Failed at creating copy dir stage") 167 171 168 172 # make the 'in' pristine copy. Cope with there being lots of files in the directory. … … 172 176 status = os.system(commandline) 173 177 if status !=0: 174 sys.exit(" Failed at making pristine copy stage")178 sys.exit("ERROR: Failed at making pristine copy stage") 175 179 176 180 # Create/clear the directory for the 'out' processed copy of the discovery records. … … 200 204 status = os.system(commandline) 201 205 if status !=0: 202 sys.exit(" Failed at copying config file stage")206 sys.exit("ERROR: Failed at copying config file stage") 203 207 204 208 #Execute the script which processes/renames the files (changed 08/01/07 to get id from inside file) … … 242 246 243 247 # ingest the datacentres records into eXist db (backups of exist happen nightly). 244 commandline = "$EXIST_HOME/bin/client.sh -c /db/discovery/original/"+datacentre_format+ "/" +datacentre_namespace+ " -u admin -P "+db_admin+" -p "+outdir 245 print "INFO: Executing : actual command to ingest into exist db" 246 status = os.system(commandline) 247 if status !=0: 248 sys.exit("Failed at ingesting into exist db. Datacentre = %s. Status = %s" %(datacentre,status)) 248 commandline = java_home +" -jar -Dexist.home=/usr/local/tomcat/webapps/exist/WEB-INF /usr/local/tomcat/webapps/exist/WEB-INF/lib/start.jar client -c /db/discovery/original/"+datacentre_format+ "/" +datacentre_namespace+ " -u admin -P "+db_admin+" -p "+outdir+ " -ouri=xmldb:exist://"+existhost+":8080/exist/xmlrpc" 249 #commandline = "$EXIST_HOME/bin/client.sh -c /db/discovery/original/"+datacentre_format+ "/" +datacentre_namespace+ " -u admin -P "+db_admin+" -p "+outdir 250 print "INFO: Executing : actual command to ingest originals into exist db." 251 status = os.system(commandline) 252 if status !=0: 253 sys.exit("ERROR: Failed at ingesting into exist db. Datacentre = %s. Status = %s" %(datacentre,status)) 249 254 250 255 #are there any old moles records hanging around.If so, remove. … … 312 317 status = os.system(commandline) 313 318 if status !=0: 314 sys.exit(" Failed at moving MOLES to FINAL directory")319 sys.exit("ERROR: Failed at moving MOLES to FINAL directory") 315 320 else: 316 321 keywordAdder.main('./DIF2MOLES', './FINALMOLES', datacentre_groups) 317 322 318 323 # ingest the created discovery minimum molesrecords into eXist db. 319 commandline = "$EXIST_HOME/bin/client.sh -c /db/discovery/moles -u admin -P "+db_admin+" -p ./FINALMOLES" 320 print "INFO: Executing : actual command to ingest into exist db." 321 status = os.system(commandline) 322 if status !=0: 323 sys.exit("Failed at ingesting into exist db. Datacentre = %s. Status = %s" %(datacentre,status)) 324 commandline = java_home +" -jar -Dexist.home=/usr/local/tomcat/webapps/exist/WEB-INF /usr/local/tomcat/webapps/exist/WEB-INF/lib/start.jar client -c /db/discovery/moles -u admin -P "+db_admin+" -p ./FINALMOLES -ouri=xmldb:exist://"+existhost+":8080/exist/xmlrpc" 325 #commandline = "$EXIST_HOME/bin/client.sh -c /db/discovery/moles -u admin -P "+db_admin+" -p ./FINALMOLES" 326 print "INFO: Executing : actual command to ingest mini-moles into exist db." 327 status = os.system(commandline) 328 if status !=0: 329 sys.exit("ERROR: Failed at ingesting into exist db. Datacentre = %s. Status = %s" %(datacentre,status)) 324 330 325 331 #Extract the spatiotemporal info from created moles and put in Postgres db … … 332 338 status = os.system(commandline) 333 339 if status !=0: 334 sys.exit(" Failed at creating backup directory %s" %this_backupdir)340 sys.exit("ERROR: Failed at creating backup directory %s" %this_backupdir) 335 341 336 342 commandline = "ls -1 /usr/local/WSClients/OAIBatch/data/" + datacentre +"/oai/originals/ | xargs -i cp /usr/local/WSClients/OAIBatch/data/" + datacentre +"/oai/originals/{\} " + this_backupdir … … 338 344 status = os.system(commandline) 339 345 if status !=0: 340 sys.exit(" Failed at copying to backup directory %s" %this_backupdir)346 sys.exit("ERROR: Failed at copying to backup directory %s" %this_backupdir) 341 347 342 348 this_backupdir = backupdir + datacentre + "_" + date_string + "_discovery" … … 345 351 status = os.system(commandline) 346 352 if status !=0: 347 sys.exit(" Failed at creating backup directory %s" %this_backupdir)353 sys.exit("ERROR: Failed at creating backup directory %s" %this_backupdir) 348 354 349 355 commandline = "ls -1 /usr/local/WSClients/OAIBatch/data/" + datacentre +"/discovery/ | xargs -i cp /usr/local/WSClients/OAIBatch/data/" + datacentre +"/discovery/{\} " + this_backupdir … … 351 357 status = os.system(commandline) 352 358 if status !=0: 353 sys.exit(" Failed at copying to backup directory %s" %this_backupdir)359 sys.exit("ERROR: Failed at copying to backup directory %s" %this_backupdir) 354 360 355 361 this_backupdir = backupdir + datacentre + "_" + date_string + "_FINALMOLES" … … 358 364 status = os.system(commandline) 359 365 if status !=0: 360 sys.exit(" Failed at creating backup directory %s" %this_backupdir)366 sys.exit("ERROR: Failed at creating backup directory %s" %this_backupdir) 361 367 362 368 commandline = "ls -1 ./FINALMOLES | xargs -i cp ./FINALMOLES/{\} " + this_backupdir … … 364 370 status = os.system(commandline) 365 371 if status !=0: 366 sys.exit(" Failed at copying to backup directory %s" %this_backupdir)372 sys.exit("ERROR: Failed at copying to backup directory %s" %this_backupdir) 367 373 368 374 #Clear out the original harvest records area and FINALMOLES … … 371 377 status = os.system(commandline) 372 378 if status !=0: 373 sys.exit(" Failed at clearing out FINALMOLES area %s" %harvest_home)379 sys.exit("ERROR: Failed at clearing out FINALMOLES area %s" %harvest_home) 374 380 375 381 commandline = "ls -1 " + harvest_home + " | xargs -i rm " + harvest_home + "/{\}" … … 377 383 status = os.system(commandline) 378 384 if status !=0: 379 sys.exit(" Failed at clearing out original harvest records area %s" %harvest_home)385 sys.exit("ERROR: Failed at clearing out original harvest records area %s" %harvest_home) 380 386 381 387 print "======================================================"
Note: See TracChangeset
for help on using the changeset viewer.