Changeset 1889 for TI01-discovery
- Timestamp:
- 18/12/06 19:16:44 (14 years ago)
- Location:
- TI01-discovery/trunk/ingestAutomation/OAIBatch
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
TI01-discovery/trunk/ingestAutomation/OAIBatch/bodc_config.properties
r1768 r1889 8 8 # 9 9 #Define groups - portal groups for limiting searches by 'group of datacentres'. 10 groups NERC-DDC NERCMDIP10 groups NERC-DDC MDIP 11 11 # 12 12 #Define which format is harvested from the data centre (one only) 13 format dif13 format DIF 14 14 # 15 15 #Define the data providers namespace -
TI01-discovery/trunk/ingestAutomation/OAIBatch/neodc_config.properties
r1769 r1889 8 8 # 9 9 #Define groups - portal groups for limiting searches by 'group of datacentres'. 10 groups NERC NERC-DDC 10 # 11 11 # 12 12 #Define which format is harvested from the data centre (one only) 13 format dif13 format DIF 14 14 # 15 15 #Define the data providers namespace -
TI01-discovery/trunk/ingestAutomation/OAIBatch/nocs_config.properties
r1768 r1889 8 8 # 9 9 #Define groups - portal groups for limiting searches by 'group of datacentres'. 10 groups NERC 10 # 11 11 # 12 12 #Define which format is harvested from the data centre (one only) 13 format dif13 format DIF 14 14 # 15 15 #Define the data providers namespace -
TI01-discovery/trunk/ingestAutomation/OAIBatch/oai_ingest.py
r1880 r1889 1 1 #!/usr/bin/env python 2 2 """ Script oai_ingest.py takes parameter <datacentre>. 3 The /usr/local/WSClients/OAIBatch directory contains this python script, a DataProvider specific config file4 and the oaiClean.py class which cleans up discovery records after harvesting. 5 The pre-processed files are then ingested to the eXist XML db. 6 7 Under this directory the following structure should be maintained:8 3 The /usr/local/WSClients/OAIBatch directory contains:- 4 - this python script, 5 - a DataProvider specific config file, 6 - the d2b.jar moles creator class which creates moles discovery records, 7 - the python module for extracting spatiotemporal information and adding to postgres db. 8 Under this directory the following structure should be maintained: 9 9 ./data 10 10 - /DATACENTRE/ 11 - discovery/: Records with namespace, schema declaration deleted - after having run 12 the oaiClean script. Ready to ingest in the discovery service. 13 - oai/difYYYYMMDD/ Records as harvested from OAI 14 11 - discovery/: Re-named documents ready to ingest in the discovery service. 12 - oai/difYYYYMMDD/ Documents as harvested from OAI 15 13 Where /DATACENTRE varies for the different data providers 16 17 14 """ 18 15 #History: … … 48 45 date_string = commands.getoutput ("date +'%y%m%d_%H%M'") 49 46 os.putenv ('EXIST_HOME', '/usr/local/exist-client') 50 os.putenv ('PATH', ':/usr/java/jdk1.5.0_03/jre:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.') 51 os.putenv ('CLASSPATH','.:/usr/java/j2sdk1.4.2_04/bin:/usr/local/WSClients/OAIBatch') 47 os.putenv ('JAVA_HOME', '/usr/java/jdk1.5.0_03') 48 os.putenv ('PATH', ':/usr/java/jdk1.5.0_03/bin:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.') 49 os.putenv ('CLASSPATH','.:/usr/java/jdk1.5.0_03/lib/tools.jar') 52 50 53 51 # Get the harvested records directory and groups for this datacentre from the datacentre specific config file … … 140 138 sys.exit("Failed at copying config file stage") 141 139 142 #Change os directory to that with the other code in it. (need this?)140 #Change os directory to that with the code in it. 143 141 os.chdir('/usr/local/WSClients/OAIBatch') 144 142 … … 156 154 #print "original file = %s, newfile = %s" %(original_filename, new_filename) 157 155 commandline = "cp "+original_filename+ " " +new_filename 158 print "Executing : " + commandline156 #print "Executing : " + commandline 159 157 status = os.system(commandline) 160 158 if status !=0: … … 174 172 # Then run the minimum moles creator which will run over all records in the supplied collection 175 173 # creates a directory ./DIF2MOLES to pass back records with original filename 176 commandline = "java -jar d2b.jar repositoryID " +datacentre_namespace+" repositoryLocalID "+datacentre+" format "+datacentre_format+" repository xmldb:exist://glue.badc.rl.ac.uk:8080/exist/xmlrpc userpw xxxxxx targetCollection /db/discovery/original/"+datacentre_format+"/"+datacentre_namespace177 print commandline174 commandline = "java -jar D2B/d2b.jar repositoryID " +datacentre_namespace+" repositoryLocalID "+datacentre+" format "+datacentre_format+" repository xmldb:exist://glue.badc.rl.ac.uk:8080/exist/xmlrpc userpw xxxxxx targetCollection /db/discovery/original/"+datacentre_format+"/"+datacentre_namespace 175 print "Executing command to run d2b.jar" 178 176 status= os.system(commandline) 179 177 if status!=0: … … 182 180 183 181 # ingest the created discovery minimum molesrecords into eXist db. 184 commandline = "$EXIST_HOME/bin/client.sh -c ./DIF2MOLES-u admin -P xxxxxx -p ./DIF2MOLES"182 commandline = "$EXIST_HOME/bin/client.sh -c /db/discovery/moles -u admin -P xxxxxx -p ./DIF2MOLES" 185 183 print "Executing : actual command to ingest into exist db" 186 184 status = os.system(commandline) … … 229 227 sys.exit("Failed at creating backup directory %s" %this_backupdir) 230 228 231 commandline = "ls -1 ./DIF2MOLES | xargs -i cp ./DIF2MOLES {\} " + this_backupdir229 commandline = "ls -1 ./DIF2MOLES | xargs -i cp ./DIF2MOLES/{\} " + this_backupdir 232 230 print "Executing : " + commandline 233 231 status = os.system(commandline) -
TI01-discovery/trunk/ingestAutomation/OAIBatch/pml_config.properties
r1768 r1889 10 10 # 11 11 #Define which format is harvested from the data centre (one only) 12 format dif12 format DIF 13 13 # 14 14 #Define the data providers namespace
Note: See TracChangeset
for help on using the changeset viewer.