Changeset 3817 for TI01-discovery
- Timestamp:
- 23/04/08 09:56:50 (13 years ago)
- Location:
- TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch
- Files:
-
- 1 added
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/SchemaNameSpace.py
r3810 r3817 1 import sys 2 from Logger import Logger 1 import sys, logging 3 2 4 3 class SchemaNameSpace: … … 7 6 NB, only currently handles correction of DIF files 8 7 ''' 9 def __init__(self, infile,outfile,format, logger):8 def __init__(self, infile, outfile, format): 10 9 ''' 11 10 Constructor - with the logic to do the namespace change … … 14 13 @param format: Format of file being processed. DIF is the only format which currently is processed. 15 14 ''' 15 logging.info("Correcting namespace in file, " + infile) 16 16 17 self.ff=open(infile,'r') 17 18 self.ww=open(outfile,'w') 18 19 self.format= format 19 20 self.lines=self.ff.readlines() 21 20 22 for self.line in self.lines: 21 23 if self.format== "DIF" and self.line.startswith('<DIF'): 22 message = "INFO: changing line for %s. output to %s" %(infile,outfile) 23 if (logger is None): 24 print message 25 else: 26 logger.printOutput(message) 24 logging.info("changing line for %s. output to %s" %(infile,outfile)) 27 25 self.line='<DIF xmlns="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">\n' 28 26 self.ww.write(self.line) 29 27 self.ff.close() 30 28 self.ww.close() 29 30 logging.info("Corrected file written to, " + outfile) -
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/db_funcs.py
r3797 r3817 3 3 # functions for use with NDG discovery postgres db 4 4 5 import pgdb 5 import pgdb, logging 6 6 7 7 def db_connect(): 8 # Open a Postgres database connection 8 ''' 9 Open a Postgres database connection 10 ''' 9 11 DATABASE = 'test' 10 12 HOST = 'localhost' … … 12 14 PW = 'pass01word' 13 15 SCHEMA = 'public' 14 # DATABASE = 'xxxx' 15 # HOST = 'xxx.xxx.uk' 16 # USER = 'xxx' 17 # PW = 'xxxxxxx' 18 # SCHEMA = 'xxx' 16 # DATABASE = 'xxxx' 17 # HOST = 'xxx.xxx.uk' 18 # USER = 'xxx' 19 # PW = 'xxxxxxx' 20 # SCHEMA = 'xxx' 21 logging.info("Setting up connection to DB: " + DATABASE + " on " + HOST) 19 22 connection_string = HOST + ':' + DATABASE + ':' + USER + ':' + PW 20 23 connection = pgdb.connect(connection_string) 21 return connection 24 logging.info("DB connection established") 25 return connection 22 26 23 27 … … 28 32 @param sqlCmd: a SQL command to execute with the postgres connection 29 33 ''' 30 print "Running SQL command: %s" %sqlCmd 34 logging.info("Running SQL command") 35 logging.debug("Actual command: %s" %sqlCmd) 31 36 cursor = connection.cursor() 32 37 try: 33 38 cursor.execute(sqlCmd) 34 39 except: 35 print "Error: database error %s %s" %(sys.exc_type, sys.exc_value)40 sys.exit("Error: database error %s %s" %(sys.exc_type, sys.exc_value)) 36 41 connection.commit() 42 logging.info("SQL command completed successfully") 37 43 return cursor.fetchall() 38 44 -
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/keywordAdder.py
r3797 r3817 1 1 #!/usr/bin/env python 2 # keywordAdder - adds Structered Keywords to moles documents 2 ''' 3 adds Structured Keywords to moles documents 4 ''' 3 5 try: #python 2.5 4 6 from xml.etree import ElementTree as etree -
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/oai_ingest_new.py
r3810 r3817 27 27 # + remove dependency on eXist DB 28 28 29 import os, sys, commands, string, getopt 29 import os, sys, string, getopt 30 from time import strftime 30 31 import keywordAdder 31 32 from SchemaNameSpace import SchemaNameSpace … … 62 63 else: 63 64 sys.exit("Only handles DIF or MDIP here.") 64 65 65 66 logger.printOutput("Found identifier: " + ID) 66 67 return ID … … 73 74 ''' 74 75 logger.printOutput("Adding file, " + filename + ", to postgres DB") 76 discoveryID = getID(filename) 77 78 # NB, if we're dealing with an NDG data provider, the details are slightly different 79 if NDG_dataProvider: 80 discObj=ndgObject(discoveryID) 81 discoveryID = discObj.localID 82 datacentre_namespace = discObj.repository 75 83 76 84 # first of all create a PostgresRecord - this object represents all the data required 77 85 # for a DB entry 78 record = PostgresRecord(filename, NDG_dataProvider, targetCollection, datacentre_namespace, 'discovery_idTEST', xq, datacentre_format) 86 record = PostgresRecord(filename, targetCollection, datacentre_namespace, discoveryID, xq, datacentre_format, logger) 87 88 # Now create the data access object to interface to the DB 79 89 dao = PostgresDAO(record) 90 91 # Finally, write the new record 80 92 dao.createOrUpdateRecord() 81 93 … … 196 208 # Now do the transform 197 209 print "INFO: Running XQuery transform to create minimoles document" 210 # print xquery 198 211 molesid,s=xmldb.executeQuery(xquery) 212 print "molesid: %s, s: %s" %(molesid, s) 199 213 moles_from_dif=xmldb.retrieve(molesid,0) 200 214 … … 246 260 commandline = "ls -1 " + minimoles_dir + " | xargs -i mv " + minimoles_dir + \ 247 261 "{\} " + finalmoles_dir 262 # commandline = "find " + minimoles_dir + " -type f -print | xargs -i mv {\} " + finalmoles_dir 248 263 logger.printOutput("INFO: Executing : " + commandline) 249 264 status = os.system(commandline) … … 282 297 print lineSeparator 283 298 print "RUNNING: oai_ingest.py" 284 print lineSeparator285 299 286 300 verboseMode = False … … 296 310 for o, a in opts: 297 311 if o == "-v": 312 print " - Verbose mode ON" 298 313 verboseMode = True 314 print lineSeparator 299 315 300 316 if (len(args) < 1 or len(args) > 2): … … 304 320 305 321 # set the default password file 306 dbinfoname = " ingest.txt"322 dbinfoname = "passwords.txt" 307 323 if (len(args) == 2): 308 324 dbinfoname = args[1] … … 325 341 326 342 # Other settings and constants 327 date_string = commands.getoutput("date +'%y%m%d_%H%M'")328 329 343 #os.putenv('EXIST_HOME', '/usr/local/exist-client') 330 344 os.putenv('EXIST_HOME', '/home/users/cbyrom/opt/eXist') 331 os.putenv('PATH', ':/usr/java/jdk1.5.0_03/bin:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.') 332 345 #os.putenv('PATH', ':/usr/java/jdk1.5.0_03/bin:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.:\\opt\\cygwin\\bin') 346 347 # - to run on Windows under cygwin, use the following 348 os.putenv('PATH', 'C:\\opt\\cygwin\\bin') 333 349 # set the global variables to retrieve from the config file 334 350 harvest_home = "" … … 371 387 fileUtils.setUpDir(originals_dir) 372 388 commandline = "ls -1 " + harvest_home + "/ | xargs -i cp " + harvest_home + "/{\} " + originals_dir 389 #commandline = "find " + harvest_home + " -type f -print | xargs -i cp \{\} " + originals_dir 373 390 logger.printOutput("INFO: Executing : " + commandline) 374 391 status = os.system(commandline) … … 427 444 corrected_filename = discovery_corrected_dir + filename 428 445 try: 429 SchemaNameSpace(in_filename, corrected_filename, datacentre_format, logger)446 SchemaNameSpace(in_filename, corrected_filename, datacentre_format, logger) 430 447 except: 431 448 print "ERROR: SchemaNameSpace failed on file %s"%in_filename … … 450 467 fileUtils.setUpDir(minimoles_dir) 451 468 fileUtils.setUpDir(finalmoles_dir) 469 452 470 createEXistMiniMoles() 453 454 # TODO: need to skip the eXist steps below and just populate the postGres DB455 # 2. use the correct xquery to transform the original doc directly and store t456 471 457 472 … … 462 477 463 478 #Make copies of discovery and oai/originals and DIF2MOLES areas to backup area for tape backups 464 backupdir_base = backupdir + datacentre + "_" + date_string 479 backupdir_base = backupdir + datacentre + "_" + strftime("%y%m%d_%H%M") 480 465 481 this_backupdir = backupdir_base + "_originals" 466 482 fileUtils.makeBackUp(originals_dir, this_backupdir)
Note: See TracChangeset
for help on using the changeset viewer.