Changeset 3803 for TI01-discovery


Ignore:
Timestamp:
18/04/08 15:21:14 (11 years ago)
Author:
selatham
Message:

Use the exist thatcomes in .war rather than old-hat one in exist-client.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/trunk/ingestAutomation/OAIBatch/oai_ingest.py

    r3785 r3803  
    11#!/usr/bin/env python 
    2 """ Script oai_ingest.py takes parameters <datacentre> <dbinfofile> <existhost> <backupdir>. 
     2""" Script oai_ingest.py takes parameters <datacentre> <dbinfofile> <existhost> <backupdir> <javahome>. 
     3NOTE:might have to change the location of tomcat if it's not in the usual place (/usr/local/tomcat). 
     4 
    35The /usr/local/WSClients/OAIBatch directory contains:- 
    46 - this python script, plus some other modules eg ndgUtils for parts of the process. 
     
    810 ./data 
    911 - /DATACENTRE/ 
    10                 - discovery/:         Re-named documents. 
    11         - discovery_corrected Documents with schema namespaces corrected, ready to ingest in the discovery service. 
    12                 - oai/difYYYYMMDD/    Documents as harvested from OAI 
     12        - discovery/:          Re-named documents. 
     13        - discovery_corrected  Documents with schema namespaces corrected, ready to ingest in the discovery service. 
     14        - oai/difYYYYMMDD/    Documents as harvested from OAI 
    1315 Where  /DATACENTRE  varies for the different data providers 
    1416""" 
     
    2426#  December 2007 SEL rewrite to use Bryans' python XQuery stuff to create mini-moles instead of java. 
    2527#                    Also extracted hard coded pwds into a file. 
     28# April 2008 SEL use exist interface that comes with the .war rather than the old-hat stuff in exist-client 
    2629 
    2730import os 
     
    6467NDG_dataProvider = False 
    6568 
    66 if (len(sys.argv) < 5): 
    67     print "ERROR: <datacentre> or <db info file> or <existhost> or <backupdir> parameter not supplied." 
     69if (len(sys.argv) < 6): 
     70    print "ERROR: <datacentre> or <db info file> or <existhost> or <backupdir> or <java_home> parameter not supplied." 
    6871    sys.exit() 
    6972else: 
     
    7477    backupdir = sys.argv[4] 
    7578    #backupdir = '/disks/glue1/oaiBackup/' 
     79    java_home = sys.argv[5] 
    7680 
    7781#Change os directory to that with the code in it. 
     
    8084# Other settings and constants 
    8185date_string = commands.getoutput ("date +'%y%m%d_%H%M'") 
    82 os.putenv ('EXIST_HOME', '/usr/local/exist-client') 
     86#os.putenv ('EXIST_HOME', '/usr/local/exist-client') 
    8387#os.putenv ('JAVA_HOME', '/usr/java/jdk1.5.0_03') 
    84 os.putenv ('PATH', ':/usr/java/jdk1.5.0_03/bin:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.') 
     88#os.putenv ('PATH', ':/usr/java/jdk1.5.0_03/bin:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.') 
    8589#os.putenv ('CLASSPATH','.:/usr/java/jdk1.5.0_03/lib/tools.jar') 
    8690 
     
    115119 
    116120if harvest_home == "": 
    117     sys.exit("Failed at getting harvested records directory stage. datacentre config file tried = %s" %datacentre_config_filename) 
     121    sys.exit("ERROR: Failed at getting harvested records directory stage. datacentre config file tried = %s" %datacentre_config_filename) 
    118122else: 
    119123    print "INFO: harvested records are in %s" %harvest_home 
     
    125129 
    126130if datacentre_format == "": 
    127     sys.exit("Failed at stage: getting datacentre format. datacentre config file tried = %s" %datacentre_config_filename) 
     131    sys.exit("ERROR: Failed at stage: getting datacentre format. datacentre config file tried = %s" %datacentre_config_filename) 
    128132else: 
    129133    print "INFO: format being harvested = %s" %datacentre_format 
    130134 
    131135if datacentre_namespace == "": 
    132     sys.exit("Failed at stage: getting datacentre namespace. datacentre config file tried = %s" %datacentre_config_filename) 
     136    sys.exit("ERROR: Failed at stage: getting datacentre namespace. datacentre config file tried = %s" %datacentre_config_filename) 
    133137else: 
    134138    print "INFO: datacentre namespace = %s" %datacentre_namespace 
     
    164168 
    165169if status != 0: 
    166     sys.exit("Failed at creating copy dir stage") 
     170    sys.exit("ERROR: Failed at creating copy dir stage") 
    167171 
    168172# make the 'in' pristine copy. Cope with there being lots of files in the directory. 
     
    172176status = os.system(commandline) 
    173177if status !=0: 
    174     sys.exit("Failed at making pristine copy stage") 
     178    sys.exit("ERROR: Failed at making pristine copy stage") 
    175179 
    176180# Create/clear the directory for the 'out' processed copy of the discovery records. 
     
    200204status = os.system(commandline) 
    201205if status !=0: 
    202     sys.exit("Failed at copying config file stage") 
     206    sys.exit("ERROR: Failed at copying config file stage") 
    203207 
    204208#Execute the script which processes/renames the files (changed 08/01/07 to get id from inside file) 
     
    242246 
    243247# ingest the datacentres records into eXist db (backups of exist happen nightly). 
    244 commandline = "$EXIST_HOME/bin/client.sh -c /db/discovery/original/"+datacentre_format+ "/" +datacentre_namespace+ " -u admin -P "+db_admin+" -p "+outdir 
    245 print "INFO: Executing : actual command to ingest into exist db" 
    246 status = os.system(commandline) 
    247 if status !=0: 
    248     sys.exit("Failed at ingesting into exist db. Datacentre =  %s. Status = %s" %(datacentre,status)) 
     248commandline = java_home +" -jar -Dexist.home=/usr/local/tomcat/webapps/exist/WEB-INF /usr/local/tomcat/webapps/exist/WEB-INF/lib/start.jar client -c /db/discovery/original/"+datacentre_format+ "/" +datacentre_namespace+ " -u admin -P "+db_admin+" -p "+outdir+ " -ouri=xmldb:exist://"+existhost+":8080/exist/xmlrpc" 
     249#commandline = "$EXIST_HOME/bin/client.sh -c /db/discovery/original/"+datacentre_format+ "/" +datacentre_namespace+ " -u admin -P "+db_admin+" -p "+outdir 
     250print "INFO: Executing : actual command to ingest originals into exist db." 
     251status = os.system(commandline) 
     252if status !=0: 
     253    sys.exit("ERROR: Failed at ingesting into exist db. Datacentre =  %s. Status = %s" %(datacentre,status)) 
    249254 
    250255#are there any old moles records hanging around.If so, remove. 
     
    312317    status = os.system(commandline) 
    313318    if status !=0: 
    314         sys.exit("Failed at moving MOLES to FINAL directory") 
     319        sys.exit("ERROR: Failed at moving MOLES to FINAL directory") 
    315320else: 
    316321    keywordAdder.main('./DIF2MOLES', './FINALMOLES', datacentre_groups) 
    317322 
    318323# ingest the created discovery minimum molesrecords into eXist db. 
    319 commandline = "$EXIST_HOME/bin/client.sh -c /db/discovery/moles -u admin -P "+db_admin+" -p ./FINALMOLES" 
    320 print "INFO: Executing : actual command to ingest into exist db." 
    321 status = os.system(commandline) 
    322 if status !=0: 
    323     sys.exit("Failed at ingesting into exist db. Datacentre =  %s. Status = %s" %(datacentre,status)) 
     324commandline = java_home +" -jar -Dexist.home=/usr/local/tomcat/webapps/exist/WEB-INF /usr/local/tomcat/webapps/exist/WEB-INF/lib/start.jar client -c /db/discovery/moles -u admin -P "+db_admin+" -p ./FINALMOLES -ouri=xmldb:exist://"+existhost+":8080/exist/xmlrpc" 
     325#commandline = "$EXIST_HOME/bin/client.sh -c /db/discovery/moles -u admin -P "+db_admin+" -p ./FINALMOLES" 
     326print "INFO: Executing : actual command to ingest mini-moles into exist db." 
     327status = os.system(commandline) 
     328if status !=0: 
     329    sys.exit("ERROR: Failed at ingesting into exist db. Datacentre =  %s. Status = %s" %(datacentre,status)) 
    324330 
    325331#Extract the spatiotemporal info from created moles and put in Postgres db 
     
    332338status = os.system(commandline) 
    333339if status !=0: 
    334     sys.exit("Failed at creating backup directory %s" %this_backupdir) 
     340    sys.exit("ERROR: Failed at creating backup directory %s" %this_backupdir) 
    335341 
    336342commandline = "ls -1 /usr/local/WSClients/OAIBatch/data/" + datacentre +"/oai/originals/ | xargs -i cp /usr/local/WSClients/OAIBatch/data/" + datacentre +"/oai/originals/{\} " + this_backupdir 
     
    338344status = os.system(commandline) 
    339345if status !=0: 
    340     sys.exit("Failed at copying to backup directory %s" %this_backupdir) 
     346    sys.exit("ERROR: Failed at copying to backup directory %s" %this_backupdir) 
    341347 
    342348this_backupdir = backupdir + datacentre + "_" + date_string + "_discovery" 
     
    345351status = os.system(commandline) 
    346352if status !=0: 
    347     sys.exit("Failed at creating backup directory %s" %this_backupdir) 
     353    sys.exit("ERROR: Failed at creating backup directory %s" %this_backupdir) 
    348354 
    349355commandline = "ls -1 /usr/local/WSClients/OAIBatch/data/" + datacentre +"/discovery/ | xargs -i cp /usr/local/WSClients/OAIBatch/data/" + datacentre +"/discovery/{\} " + this_backupdir 
     
    351357status = os.system(commandline) 
    352358if status !=0: 
    353     sys.exit("Failed at copying to backup directory %s" %this_backupdir) 
     359    sys.exit("ERROR: Failed at copying to backup directory %s" %this_backupdir) 
    354360 
    355361this_backupdir = backupdir + datacentre + "_" + date_string + "_FINALMOLES" 
     
    358364status = os.system(commandline) 
    359365if status !=0: 
    360     sys.exit("Failed at creating backup directory %s" %this_backupdir) 
     366    sys.exit("ERROR: Failed at creating backup directory %s" %this_backupdir) 
    361367 
    362368commandline = "ls -1 ./FINALMOLES | xargs -i cp ./FINALMOLES/{\} " + this_backupdir 
     
    364370status = os.system(commandline) 
    365371if status !=0: 
    366     sys.exit("Failed at copying to backup directory %s" %this_backupdir) 
     372    sys.exit("ERROR: Failed at copying to backup directory %s" %this_backupdir) 
    367373 
    368374#Clear out the original harvest records area and FINALMOLES 
     
    371377status = os.system(commandline) 
    372378if status !=0: 
    373     sys.exit("Failed at clearing out FINALMOLES area %s" %harvest_home) 
     379    sys.exit("ERROR: Failed at clearing out FINALMOLES area %s" %harvest_home) 
    374380 
    375381commandline = "ls -1 " + harvest_home + " | xargs -i rm " + harvest_home + "/{\}" 
     
    377383status = os.system(commandline) 
    378384if status !=0: 
    379     sys.exit("Failed at clearing out original harvest records area %s" %harvest_home) 
     385    sys.exit("ERROR: Failed at clearing out original harvest records area %s" %harvest_home) 
    380386 
    381387print "======================================================" 
Note: See TracChangeset for help on using the changeset viewer.