Changeset 3817


Ignore:
Timestamp:
23/04/08 09:56:50 (12 years ago)
Author:
cbyrom
Message:

Add default logging support + create new version of ingest script, removing
all traces of the eXist DB + improve documentation and output.

Location:
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch
Files:
1 added
4 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/SchemaNameSpace.py

    r3810 r3817  
    1 import sys 
    2 from Logger import Logger 
     1import sys, logging 
    32 
    43class SchemaNameSpace: 
     
    76    NB, only currently handles correction of DIF files 
    87    ''' 
    9     def __init__(self,infile,outfile,format, logger): 
     8    def __init__(self, infile, outfile, format): 
    109        ''' 
    1110        Constructor - with the logic to do the namespace change 
     
    1413        @param format: Format of file being processed.  DIF is the only format which currently is processed.    
    1514        ''' 
     15        logging.info("Correcting namespace in file, " + infile) 
     16             
    1617        self.ff=open(infile,'r') 
    1718        self.ww=open(outfile,'w') 
    1819        self.format= format 
    1920        self.lines=self.ff.readlines() 
     21         
    2022        for self.line in self.lines: 
    2123            if self.format== "DIF" and self.line.startswith('<DIF'): 
    22                 message = "INFO: changing line for %s. output to %s" %(infile,outfile) 
    23                 if (logger is None): 
    24                     print message 
    25                 else: 
    26                     logger.printOutput(message) 
     24                logging.info("changing line for %s. output to %s" %(infile,outfile)) 
    2725                self.line='<DIF xmlns="http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">\n' 
    2826            self.ww.write(self.line) 
    2927        self.ff.close() 
    3028        self.ww.close() 
     29 
     30        logging.info("Corrected file written to, " + outfile) 
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/db_funcs.py

    r3797 r3817  
    33# functions for use with NDG discovery postgres db 
    44 
    5 import pgdb 
     5import pgdb, logging 
    66 
    77def db_connect(): 
    8         # Open a Postgres database connection 
     8        ''' 
     9        Open a Postgres database connection 
     10        ''' 
    911        DATABASE = 'test' 
    1012        HOST     = 'localhost' 
     
    1214        PW       = 'pass01word' 
    1315        SCHEMA   = 'public' 
    14 #       DATABASE = 'xxxx' 
    15 #       HOST     = 'xxx.xxx.uk' 
    16 #       USER     = 'xxx' 
    17 #       PW       = 'xxxxxxx' 
    18 #       SCHEMA   = 'xxx' 
     16        #       DATABASE = 'xxxx' 
     17        #       HOST     = 'xxx.xxx.uk' 
     18        #       USER     = 'xxx' 
     19        #       PW       = 'xxxxxxx' 
     20        #       SCHEMA   = 'xxx' 
     21        logging.info("Setting up connection to DB: " + DATABASE + " on " + HOST) 
    1922        connection_string = HOST + ':' + DATABASE + ':' + USER + ':' + PW 
    2023        connection = pgdb.connect(connection_string) 
    21         return connection 
     24        logging.info("DB connection established") 
     25        return connection 
    2226 
    2327 
     
    2832        @param sqlCmd: a SQL command to execute with the postgres connection   
    2933        ''' 
    30         print "Running SQL command: %s" %sqlCmd 
     34        logging.info("Running SQL command") 
     35        logging.debug("Actual command: %s" %sqlCmd) 
    3136        cursor = connection.cursor() 
    3237        try: 
    3338            cursor.execute(sqlCmd) 
    3439        except: 
    35             print "Error: database error %s %s" %(sys.exc_type, sys.exc_value) 
     40            sys.exit("Error: database error %s %s" %(sys.exc_type, sys.exc_value)) 
    3641        connection.commit() 
     42        logging.info("SQL command completed successfully") 
    3743        return cursor.fetchall() 
    3844 
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/keywordAdder.py

    r3797 r3817  
    11#!/usr/bin/env python 
    2 # keywordAdder - adds Structered Keywords to moles documents 
     2''' 
     3adds Structured Keywords to moles documents 
     4''' 
    35try: #python 2.5 
    46    from xml.etree import ElementTree as etree 
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/oai_ingest_new.py

    r3810 r3817  
    2727# + remove dependency on eXist DB 
    2828 
    29 import os, sys, commands, string, getopt 
     29import os, sys, string, getopt 
     30from time import strftime 
    3031import keywordAdder 
    3132from SchemaNameSpace import SchemaNameSpace 
     
    6263        else: 
    6364            sys.exit("Only handles DIF or MDIP here.") 
    64              
     65 
    6566        logger.printOutput("Found identifier: " + ID) 
    6667        return ID 
     
    7374        ''' 
    7475        logger.printOutput("Adding file, " + filename + ", to postgres DB") 
     76        discoveryID = getID(filename) 
     77         
     78        # NB, if we're dealing with an NDG data provider, the details are slightly different 
     79        if NDG_dataProvider: 
     80                discObj=ndgObject(discoveryID) 
     81        discoveryID = discObj.localID 
     82        datacentre_namespace = discObj.repository 
    7583         
    7684        # first of all create a PostgresRecord - this object represents all the data required 
    7785        # for a DB entry 
    78         record = PostgresRecord(filename, NDG_dataProvider, targetCollection, datacentre_namespace, 'discovery_idTEST', xq, datacentre_format) 
     86        record = PostgresRecord(filename, targetCollection, datacentre_namespace, discoveryID, xq, datacentre_format, logger) 
     87 
     88        # Now create the data access object to interface to the DB 
    7989        dao = PostgresDAO(record) 
     90         
     91        # Finally, write the new record 
    8092        dao.createOrUpdateRecord() 
    8193 
     
    196208            # Now do the transform 
    197209            print "INFO: Running XQuery transform to create minimoles document" 
     210#           print xquery 
    198211            molesid,s=xmldb.executeQuery(xquery) 
     212            print "molesid: %s, s: %s" %(molesid, s) 
    199213            moles_from_dif=xmldb.retrieve(molesid,0) 
    200214 
     
    246260            commandline = "ls -1 " + minimoles_dir + " | xargs -i mv " + minimoles_dir + \ 
    247261                "{\} " + finalmoles_dir 
     262#           commandline = "find " + minimoles_dir + " -type f -print | xargs -i mv {\} " + finalmoles_dir 
    248263            logger.printOutput("INFO: Executing : " + commandline) 
    249264            status = os.system(commandline) 
     
    282297print lineSeparator 
    283298print "RUNNING: oai_ingest.py"           
    284 print lineSeparator 
    285299 
    286300verboseMode = False 
     
    296310for o, a in opts: 
    297311    if o == "-v": 
     312        print " - Verbose mode ON" 
    298313        verboseMode = True 
     314print lineSeparator 
    299315 
    300316if (len(args) < 1 or len(args) > 2): 
     
    304320 
    305321# set the default password file 
    306 dbinfoname = "ingest.txt" 
     322dbinfoname = "passwords.txt" 
    307323if (len(args) == 2): 
    308324        dbinfoname = args[1] 
     
    325341 
    326342# Other settings and constants 
    327 date_string = commands.getoutput("date +'%y%m%d_%H%M'") 
    328   
    329343#os.putenv('EXIST_HOME', '/usr/local/exist-client') 
    330344os.putenv('EXIST_HOME', '/home/users/cbyrom/opt/eXist') 
    331 os.putenv('PATH', ':/usr/java/jdk1.5.0_03/bin:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.') 
    332  
     345#os.putenv('PATH', ':/usr/java/jdk1.5.0_03/bin:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.:\\opt\\cygwin\\bin') 
     346 
     347# - to run on Windows under cygwin, use the following 
     348os.putenv('PATH', 'C:\\opt\\cygwin\\bin') 
    333349# set the global variables to retrieve from the config file 
    334350harvest_home = "" 
     
    371387fileUtils.setUpDir(originals_dir) 
    372388commandline = "ls -1 " + harvest_home + "/ | xargs -i cp " + harvest_home + "/{\} " + originals_dir 
     389#commandline = "find " + harvest_home + " -type f -print | xargs -i cp \{\} " + originals_dir 
    373390logger.printOutput("INFO: Executing : " + commandline) 
    374391status = os.system(commandline) 
     
    427444                    corrected_filename = discovery_corrected_dir + filename 
    428445                    try: 
    429                        SchemaNameSpace(in_filename, corrected_filename,datacentre_format, logger) 
     446                       SchemaNameSpace(in_filename, corrected_filename, datacentre_format, logger) 
    430447                    except: 
    431448                       print "ERROR: SchemaNameSpace failed on file %s"%in_filename 
     
    450467fileUtils.setUpDir(minimoles_dir) 
    451468fileUtils.setUpDir(finalmoles_dir) 
     469 
    452470createEXistMiniMoles() 
    453  
    454 # TODO: need to skip the eXist steps below and just populate the postGres DB 
    455 # 2. use the correct xquery to transform the original doc directly and store t 
    456471 
    457472 
     
    462477 
    463478#Make copies of discovery and oai/originals and DIF2MOLES areas to backup area for tape backups 
    464 backupdir_base = backupdir + datacentre + "_" + date_string 
     479backupdir_base = backupdir + datacentre + "_" + strftime("%y%m%d_%H%M") 
     480 
    465481this_backupdir = backupdir_base + "_originals" 
    466482fileUtils.makeBackUp(originals_dir, this_backupdir) 
Note: See TracChangeset for help on using the changeset viewer.