Ignore:
Timestamp:
23/04/08 09:56:50 (12 years ago)
Author:
cbyrom
Message:

Add default logging support + create new version of ingest script, removing
all traces of the eXist DB + improve documentation and output.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/oai_ingest_new.py

    r3810 r3817  
    2727# + remove dependency on eXist DB 
    2828 
    29 import os, sys, commands, string, getopt 
     29import os, sys, string, getopt 
     30from time import strftime 
    3031import keywordAdder 
    3132from SchemaNameSpace import SchemaNameSpace 
     
    6263        else: 
    6364            sys.exit("Only handles DIF or MDIP here.") 
    64              
     65 
    6566        logger.printOutput("Found identifier: " + ID) 
    6667        return ID 
     
    7374        ''' 
    7475        logger.printOutput("Adding file, " + filename + ", to postgres DB") 
     76        discoveryID = getID(filename) 
     77         
     78        # NB, if we're dealing with an NDG data provider, the details are slightly different 
     79        if NDG_dataProvider: 
     80                discObj=ndgObject(discoveryID) 
     81        discoveryID = discObj.localID 
     82        datacentre_namespace = discObj.repository 
    7583         
    7684        # first of all create a PostgresRecord - this object represents all the data required 
    7785        # for a DB entry 
    78         record = PostgresRecord(filename, NDG_dataProvider, targetCollection, datacentre_namespace, 'discovery_idTEST', xq, datacentre_format) 
     86        record = PostgresRecord(filename, targetCollection, datacentre_namespace, discoveryID, xq, datacentre_format, logger) 
     87 
     88        # Now create the data access object to interface to the DB 
    7989        dao = PostgresDAO(record) 
     90         
     91        # Finally, write the new record 
    8092        dao.createOrUpdateRecord() 
    8193 
     
    196208            # Now do the transform 
    197209            print "INFO: Running XQuery transform to create minimoles document" 
     210#           print xquery 
    198211            molesid,s=xmldb.executeQuery(xquery) 
     212            print "molesid: %s, s: %s" %(molesid, s) 
    199213            moles_from_dif=xmldb.retrieve(molesid,0) 
    200214 
     
    246260            commandline = "ls -1 " + minimoles_dir + " | xargs -i mv " + minimoles_dir + \ 
    247261                "{\} " + finalmoles_dir 
     262#           commandline = "find " + minimoles_dir + " -type f -print | xargs -i mv {\} " + finalmoles_dir 
    248263            logger.printOutput("INFO: Executing : " + commandline) 
    249264            status = os.system(commandline) 
     
    282297print lineSeparator 
    283298print "RUNNING: oai_ingest.py"           
    284 print lineSeparator 
    285299 
    286300verboseMode = False 
     
    296310for o, a in opts: 
    297311    if o == "-v": 
     312        print " - Verbose mode ON" 
    298313        verboseMode = True 
     314print lineSeparator 
    299315 
    300316if (len(args) < 1 or len(args) > 2): 
     
    304320 
    305321# set the default password file 
    306 dbinfoname = "ingest.txt" 
     322dbinfoname = "passwords.txt" 
    307323if (len(args) == 2): 
    308324        dbinfoname = args[1] 
     
    325341 
    326342# Other settings and constants 
    327 date_string = commands.getoutput("date +'%y%m%d_%H%M'") 
    328   
    329343#os.putenv('EXIST_HOME', '/usr/local/exist-client') 
    330344os.putenv('EXIST_HOME', '/home/users/cbyrom/opt/eXist') 
    331 os.putenv('PATH', ':/usr/java/jdk1.5.0_03/bin:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.') 
    332  
     345#os.putenv('PATH', ':/usr/java/jdk1.5.0_03/bin:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.:\\opt\\cygwin\\bin') 
     346 
     347# - to run on Windows under cygwin, use the following 
     348os.putenv('PATH', 'C:\\opt\\cygwin\\bin') 
    333349# set the global variables to retrieve from the config file 
    334350harvest_home = "" 
     
    371387fileUtils.setUpDir(originals_dir) 
    372388commandline = "ls -1 " + harvest_home + "/ | xargs -i cp " + harvest_home + "/{\} " + originals_dir 
     389#commandline = "find " + harvest_home + " -type f -print | xargs -i cp \{\} " + originals_dir 
    373390logger.printOutput("INFO: Executing : " + commandline) 
    374391status = os.system(commandline) 
     
    427444                    corrected_filename = discovery_corrected_dir + filename 
    428445                    try: 
    429                        SchemaNameSpace(in_filename, corrected_filename,datacentre_format, logger) 
     446                       SchemaNameSpace(in_filename, corrected_filename, datacentre_format, logger) 
    430447                    except: 
    431448                       print "ERROR: SchemaNameSpace failed on file %s"%in_filename 
     
    450467fileUtils.setUpDir(minimoles_dir) 
    451468fileUtils.setUpDir(finalmoles_dir) 
     469 
    452470createEXistMiniMoles() 
    453  
    454 # TODO: need to skip the eXist steps below and just populate the postGres DB 
    455 # 2. use the correct xquery to transform the original doc directly and store t 
    456471 
    457472 
     
    462477 
    463478#Make copies of discovery and oai/originals and DIF2MOLES areas to backup area for tape backups 
    464 backupdir_base = backupdir + datacentre + "_" + date_string 
     479backupdir_base = backupdir + datacentre + "_" + strftime("%y%m%d_%H%M") 
     480 
    465481this_backupdir = backupdir_base + "_originals" 
    466482fileUtils.makeBackUp(originals_dir, this_backupdir) 
Note: See TracChangeset for help on using the changeset viewer.