Ignore:
Timestamp:
23/04/08 17:24:55 (11 years ago)
Author:
cbyrom
Message:

Fix a few problems - including referencing the xquery libraries; these
have now been added to the ndgUtils egg and are extracted locally and
referenced directly. Also add functionality to deal with the moles -> other
transforms + add new utility methods and tidy up code and add more logging.

Location:
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/FileUtilities.py

    r3815 r3821  
    11import os, sys, logging 
     2from os import makedirs 
     3from os.path import normpath,dirname,exists,abspath 
    24 
    35class FileUtilities: 
     
    1012        Constructor 
    1113        ''' 
     14 
     15    def createFile(self, fileName, content): 
     16        ''' 
     17        Create a file with the specified name and content 
     18        @param fileName: name of file to create 
     19        @param content: content of file to create   
     20        ''' 
     21        try: 
     22            f = open(fileName,'w') 
     23            f.write(content) 
     24            f.close() 
     25        except: 
     26            sys.exit("ERROR: Problem encountered when creating file, %s" %fileName) 
    1227 
    1328 
     
    2540        from holger@trillke.net 2002/03/18 
    2641        ''' 
    27         from os import makedirs 
    28         from os.path import normpath,dirname,exists,abspath 
    29      
    3042        logging.info("Creating dir: " + path)  
    3143        dpath = normpath(dirname(path)) 
     
    4557        else: 
    4658            try: 
    47                 makepath(dir) 
     59                self.makepath(dir) 
    4860            except: 
    4961                sys.exit("Failed at setting up directory, %s" %dir) 
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/PostgresRecord.py

    r3816 r3821  
    1919from ETxmlView import loadET, nsdumb 
    2020import molesReadWrite as MRW 
     21from ndgUtils.ndgObject import ndgObject 
     22from FileUtilities import FileUtilities 
    2123 
    2224class PostgresRecord: 
     
    2830    documentTypes = ['MOLES', 'DIF', 'DC', 'MDIP', 'ISO19139'] 
    2931         
    30     def __init__(self, filename, datacentre_groups, datacentre_namespace, discovery_id, xq, docType): 
     32    def __init__(self, filename, ndg_dataprovider, datacentre_groups, datacentre_namespace, discovery_id, xq, docType): 
    3133        logging.info("Setting up Postgres record for file, " + filename) 
    3234        self.filename = filename 
     35     
     36        # NB, if we're dealing with an NDG data provider, the details are slightly different 
     37        if ndg_dataprovider: 
     38            discObj=ndgObject(discovery_id) 
     39            self._local_id = discObj.localID 
     40            self._repository_local_id = discObj.repository 
     41        else: 
     42            self._local_id = discovery_id 
     43            self._repository_local_id = datacentre_namespace 
     44             
    3345        self._datacentre_groups = datacentre_groups 
    3446        self._repository = datacentre_namespace 
     
    4052        self._allDocs = []  # array to store all the transformed docs - for easy retrieval by the DAO 
    4153 
     54        self._fileUtils = FileUtilities() 
     55 
     56        # get the dir of the file - needed by the xquery to use as the target collection 
     57        tmp = filename.split('/') 
     58        self._dir = '/'.join(tmp[0:len(tmp)-1]) 
     59        self._shortFilename = tmp[len(tmp)-1] 
     60         
     61        # dir to store a temp copy of the moles file, when produced - for use by other transforms 
     62        self._molesDir = None 
     63 
    4264        # firstly load contents of file 
    4365        self.originalFormat = file(filename).read() 
     
    4668        # DO WE NEED TO DO THIS?? 
    4769        self.correctedFormat = loadET(self.originalFormat) 
    48          
    49  
    50         #debugging stuff 
    51 #        self.logger.printOutput("vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv") 
    52 #        print self.correctedFormat 
    53 #        print self.originalFormat.keys() 
    54 #        for i in self.originalFormat: print i.tag 
    55 #        print dir(self.originalFormat) 
    56 #        self.logger.printOutput("vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv") 
    57         #we use nsdumb in case the namespace causes difficulties ... 
    58 #        helper=nsdumb(self.originalFormat) 
    59         #print helper 
    60 #        self.id=helper.getText(self.originalFormat,'DatasetIdentifier') 
    6170 
    6271         
     
    6978         
    7079        # do some initial setting up of record 
    71 #        self.setUpRecord() 
    7280        self.doRecordTransforms() 
    7381        self.getSpatioTemporalData() 
     
    8290        for docType in self.documentTypes: 
    8391            self.getDocumentFormat(docType) 
     92             
    8493        logging.info("Transforms complete") 
    8594 
     95 
     96    def createMolesFile(self): 
     97        ''' 
     98        Check if a moles file exists on the system; if not, assume the moles transform has not 
     99        been ran and then produce this file - to allow for use in the various xqueries 
     100        ''' 
     101        logging.info("Creating moles file on system - for use with other xquery transforms") 
     102        self._molesDir = self._dir + "/moles/" 
     103        self._fileUtils.setUpDir(self._molesDir) 
     104         
     105        if self._molesFormat is None: 
     106            self.doMolesTransform() 
     107             
     108        self._fileUtils.createFile(self._molesDir + self._shortFilename, self._molesFormat) 
     109        logging.info("Moles file created - at %s" %self._molesDir) 
     110             
    86111 
    87112    def doTransform(self, xQueryType): 
     
    93118        logging.info("Running XQuery transform, " + xQueryType + " to create transformed document") 
    94119 
     120        # firstly, check if this is a moles -> something else query; if so, ensure there is a valid 
     121        # moles file available for the transform - and use the correct dir for the xquery collection 
     122        dir = self._dir 
     123        if xQueryType.find('moles2') > -1: 
     124            if self._molesDir is None: 
     125                self.createMolesFile() 
     126                 
     127            dir = self._molesDir 
     128             
    95129        # get the query and set this up to use properly 
    96         xquery = self._xq.actual(xQueryType, self.filename, self._repository, self.discovery_id) 
     130        xquery = self._xq.actual(xQueryType, dir, self._repository_local_id, self._local_id) 
    97131 
    98132        # sort out the input ID stuff 
     
    100134        xquery=xquery.replace('repository_localid', self._repository) 
    101135 
     136        # strip out the eXist reference to the libraries; these files should be available in the 
     137        # running dir - as set up by oai_ingest.py 
     138        xquery=xquery.replace('xmldb:exist:///db/xqueryLib/Vocabs/', '') 
     139        xquery=xquery.replace('xmldb:exist:///db/xqueryLib/Utilities/', '') 
     140 
    102141        # write the query to file, to make it easier to input 
    103142        # NB, running directly at the command line leads to problems with the interpretation of $ characters 
    104143        xqFile = "currentQuery.xq" 
    105         f=open(xqFile,'w') 
    106         f.write(xquery) 
    107         f.close() 
     144        self._fileUtils.createFile(xqFile, xquery) 
    108145 
    109146        # Now do the transform 
    110 #        xqCommand = "java -cp /home/users/cbyrom/opt/saxonsa/saxon9sa.jar:/home/users/cbyrom/opt/saxonsa net.sf.saxon.Query " + \ 
    111147        os.putenv ('PATH', ':/usr/java/jdk1.5.0_03/bin:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.') 
    112148        xqCommand = "java -cp saxon9.jar net.sf.saxon.Query " + xqFile 
     
    117153 
    118154        print output 
    119         print "ss,", status 
    120155        if status is not None: 
    121156            sys.exit("Failed at running the XQuery") 
     
    163198        tmpDir = os.getcwd() + "/" 
    164199        tmpKeywordsDir = os.getcwd() + "/kewordsAdded/" 
    165         fileUtils = FileUtilities(self.logger) 
    166         fileUtils.setUpDir(tmpDir) 
    167         fileUtils.setUpDir(tmpKeywordsDir) 
     200        self._fileUtils.setUpDir(tmpDir) 
     201        self._fileUtils.setUpDir(tmpKeywordsDir) 
    168202        tmpFile = 'tmpFile.xml' 
    169         f=open(tmpDir + "/" + tmpFile,'w') 
    170         f.write(self._molesFormat) 
    171         f.close() 
     203        self._fileUtils.createFile(tmpDir + "/" + tmpFile, self._molesFormat) 
    172204 
    173205        keywordAdder.main(tmpDir, tmpKeywordsDir, self.datacentre_groups) 
     
    179211         
    180212        # Finally, tidy up temp dirs 
    181         fileUtils.cleanDir(tmpDir) 
    182         fileUtils.clearDir(tmpKeywordsDir) 
     213        self._fileUtils.cleanDir(tmpDir) 
     214        self._fileUtils.clearDir(tmpKeywordsDir) 
    183215        logging.info("Completed adding keywords") 
    184216         
     
    197229        if self._molesFormat is None: 
    198230            self.doMolesTransform() 
     231            self.createMolesFile() 
    199232         
    200233        # check the document isn't already defined 
     
    205238                return doc 
    206239        except: 
    207             logging.info("Creating new transformed document") 
     240            logging.info("Document not available - creating new transformed document") 
    208241 
    209242        # the doc type doesn't exist - so run the xquery 
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/SpaceTimeIngestFromMOLES.py

    r3797 r3821  
    2525import db_funcs 
    2626import os 
    27 import PostgresDBUtils 
    2827 
    2928#connect to db (in separate db functions module) 
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/oai_ingest_new2.py

    r3817 r3821  
    3434import ndgUtils 
    3535from ndgUtils.ndgXqueries import ndgXqueries 
    36 from ndgUtils.ndgObject import ndgObject 
    3736from FileUtilities import FileUtilities 
    3837from PostgresRecord import PostgresRecord 
     
    6564        Add a file to the postgres DB - extracting and storing all the required 
    6665        data in the process 
     66        @param filename: full path of file to add to postgres DB  
    6767        ''' 
    6868        logging.info("Adding file, " + filename + ", to postgres DB") 
    6969        discoveryID = getID(filename) 
    7070         
    71         # NB, if we're dealing with an NDG data provider, the details are slightly different 
    72         if NDG_dataProvider: 
    73                 discObj=ndgObject(discoveryID) 
    74         discoveryID = discObj.localID 
    75         datacentre_namespace = discObj.repository 
    76          
    7771        # first of all create a PostgresRecord - this object represents all the data required 
    7872        # for a DB entry 
    79         record = PostgresRecord(filename, datacentre_groups, datacentre_namespace, discoveryID, xq, datacentre_format) 
     73        record = PostgresRecord(filename, NDG_dataProvider, datacentre_groups, datacentre_namespace, discoveryID, xq, datacentre_format) 
    8074 
    8175        # Now create the data access object to interface to the DB 
     
    145139         
    146140        logging.info("datacentre namespace: " + datacentre_namespace) 
     141         
     142        if NDG_dataProvider: 
     143                logging.info("Datacentre classified as an NDG data provider") 
     144        else: 
     145                logging.info("Datacentre is not classificied as an NDG data provider") 
    147146        print lineSeparator 
    148147         
     
    160159print lineSeparator 
    161160print "RUNNING: oai_ingest.py"           
    162  
    163 verboseMode = False 
    164  
    165161 
    166162# check for verbose option 
     
    276272 
    277273# now set up the required XQueries 
     274# - NB, extract the xquery libraries locally for easy reference 
    278275xq=ndgXqueries() 
     276for libFile in xq.xqlib: 
     277        fileUtils.createFile(libFile, xq.xqlib[libFile]) 
    279278 
    280279# Process the resulting files and put the data into the postgres DB 
Note: See TracChangeset for help on using the changeset viewer.