Changeset 3821 for TI01-discovery
- Timestamp:
- 23/04/08 17:24:55 (13 years ago)
- Location:
- TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/FileUtilities.py
r3815 r3821 1 1 import os, sys, logging 2 from os import makedirs 3 from os.path import normpath,dirname,exists,abspath 2 4 3 5 class FileUtilities: … … 10 12 Constructor 11 13 ''' 14 15 def createFile(self, fileName, content): 16 ''' 17 Create a file with the specified name and content 18 @param fileName: name of file to create 19 @param content: content of file to create 20 ''' 21 try: 22 f = open(fileName,'w') 23 f.write(content) 24 f.close() 25 except: 26 sys.exit("ERROR: Problem encountered when creating file, %s" %fileName) 12 27 13 28 … … 25 40 from holger@trillke.net 2002/03/18 26 41 ''' 27 from os import makedirs28 from os.path import normpath,dirname,exists,abspath29 30 42 logging.info("Creating dir: " + path) 31 43 dpath = normpath(dirname(path)) … … 45 57 else: 46 58 try: 47 makepath(dir)59 self.makepath(dir) 48 60 except: 49 61 sys.exit("Failed at setting up directory, %s" %dir) -
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/PostgresRecord.py
r3816 r3821 19 19 from ETxmlView import loadET, nsdumb 20 20 import molesReadWrite as MRW 21 from ndgUtils.ndgObject import ndgObject 22 from FileUtilities import FileUtilities 21 23 22 24 class PostgresRecord: … … 28 30 documentTypes = ['MOLES', 'DIF', 'DC', 'MDIP', 'ISO19139'] 29 31 30 def __init__(self, filename, datacentre_groups, datacentre_namespace, discovery_id, xq, docType):32 def __init__(self, filename, ndg_dataprovider, datacentre_groups, datacentre_namespace, discovery_id, xq, docType): 31 33 logging.info("Setting up Postgres record for file, " + filename) 32 34 self.filename = filename 35 36 # NB, if we're dealing with an NDG data provider, the details are slightly different 37 if ndg_dataprovider: 38 discObj=ndgObject(discovery_id) 39 self._local_id = discObj.localID 40 self._repository_local_id = discObj.repository 41 else: 42 self._local_id = discovery_id 43 self._repository_local_id = datacentre_namespace 44 33 45 self._datacentre_groups = datacentre_groups 34 46 self._repository = datacentre_namespace … … 40 52 self._allDocs = [] # array to store all the transformed docs - for easy retrieval by the DAO 41 53 54 self._fileUtils = FileUtilities() 55 56 # get the dir of the file - needed by the xquery to use as the target collection 57 tmp = filename.split('/') 58 self._dir = '/'.join(tmp[0:len(tmp)-1]) 59 self._shortFilename = tmp[len(tmp)-1] 60 61 # dir to store a temp copy of the moles file, when produced - for use by other transforms 62 self._molesDir = None 63 42 64 # firstly load contents of file 43 65 self.originalFormat = file(filename).read() … … 46 68 # DO WE NEED TO DO THIS?? 47 69 self.correctedFormat = loadET(self.originalFormat) 48 49 50 #debugging stuff51 # self.logger.printOutput("vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv")52 # print self.correctedFormat53 # print self.originalFormat.keys()54 # for i in self.originalFormat: print i.tag55 # print dir(self.originalFormat)56 # self.logger.printOutput("vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv")57 #we use nsdumb in case the namespace causes difficulties ...58 # helper=nsdumb(self.originalFormat)59 #print helper60 # self.id=helper.getText(self.originalFormat,'DatasetIdentifier')61 70 62 71 … … 69 78 70 79 # do some initial setting up of record 71 # self.setUpRecord()72 80 self.doRecordTransforms() 73 81 self.getSpatioTemporalData() … … 82 90 for docType in self.documentTypes: 83 91 self.getDocumentFormat(docType) 92 84 93 logging.info("Transforms complete") 85 94 95 96 def createMolesFile(self): 97 ''' 98 Check if a moles file exists on the system; if not, assume the moles transform has not 99 been ran and then produce this file - to allow for use in the various xqueries 100 ''' 101 logging.info("Creating moles file on system - for use with other xquery transforms") 102 self._molesDir = self._dir + "/moles/" 103 self._fileUtils.setUpDir(self._molesDir) 104 105 if self._molesFormat is None: 106 self.doMolesTransform() 107 108 self._fileUtils.createFile(self._molesDir + self._shortFilename, self._molesFormat) 109 logging.info("Moles file created - at %s" %self._molesDir) 110 86 111 87 112 def doTransform(self, xQueryType): … … 93 118 logging.info("Running XQuery transform, " + xQueryType + " to create transformed document") 94 119 120 # firstly, check if this is a moles -> something else query; if so, ensure there is a valid 121 # moles file available for the transform - and use the correct dir for the xquery collection 122 dir = self._dir 123 if xQueryType.find('moles2') > -1: 124 if self._molesDir is None: 125 self.createMolesFile() 126 127 dir = self._molesDir 128 95 129 # get the query and set this up to use properly 96 xquery = self._xq.actual(xQueryType, self.filename, self._repository, self.discovery_id)130 xquery = self._xq.actual(xQueryType, dir, self._repository_local_id, self._local_id) 97 131 98 132 # sort out the input ID stuff … … 100 134 xquery=xquery.replace('repository_localid', self._repository) 101 135 136 # strip out the eXist reference to the libraries; these files should be available in the 137 # running dir - as set up by oai_ingest.py 138 xquery=xquery.replace('xmldb:exist:///db/xqueryLib/Vocabs/', '') 139 xquery=xquery.replace('xmldb:exist:///db/xqueryLib/Utilities/', '') 140 102 141 # write the query to file, to make it easier to input 103 142 # NB, running directly at the command line leads to problems with the interpretation of $ characters 104 143 xqFile = "currentQuery.xq" 105 f=open(xqFile,'w') 106 f.write(xquery) 107 f.close() 144 self._fileUtils.createFile(xqFile, xquery) 108 145 109 146 # Now do the transform 110 # xqCommand = "java -cp /home/users/cbyrom/opt/saxonsa/saxon9sa.jar:/home/users/cbyrom/opt/saxonsa net.sf.saxon.Query " + \111 147 os.putenv ('PATH', ':/usr/java/jdk1.5.0_03/bin:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.') 112 148 xqCommand = "java -cp saxon9.jar net.sf.saxon.Query " + xqFile … … 117 153 118 154 print output 119 print "ss,", status120 155 if status is not None: 121 156 sys.exit("Failed at running the XQuery") … … 163 198 tmpDir = os.getcwd() + "/" 164 199 tmpKeywordsDir = os.getcwd() + "/kewordsAdded/" 165 fileUtils = FileUtilities(self.logger) 166 fileUtils.setUpDir(tmpDir) 167 fileUtils.setUpDir(tmpKeywordsDir) 200 self._fileUtils.setUpDir(tmpDir) 201 self._fileUtils.setUpDir(tmpKeywordsDir) 168 202 tmpFile = 'tmpFile.xml' 169 f=open(tmpDir + "/" + tmpFile,'w') 170 f.write(self._molesFormat) 171 f.close() 203 self._fileUtils.createFile(tmpDir + "/" + tmpFile, self._molesFormat) 172 204 173 205 keywordAdder.main(tmpDir, tmpKeywordsDir, self.datacentre_groups) … … 179 211 180 212 # Finally, tidy up temp dirs 181 fileUtils.cleanDir(tmpDir)182 fileUtils.clearDir(tmpKeywordsDir)213 self._fileUtils.cleanDir(tmpDir) 214 self._fileUtils.clearDir(tmpKeywordsDir) 183 215 logging.info("Completed adding keywords") 184 216 … … 197 229 if self._molesFormat is None: 198 230 self.doMolesTransform() 231 self.createMolesFile() 199 232 200 233 # check the document isn't already defined … … 205 238 return doc 206 239 except: 207 logging.info(" Creating new transformed document")240 logging.info("Document not available - creating new transformed document") 208 241 209 242 # the doc type doesn't exist - so run the xquery -
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/SpaceTimeIngestFromMOLES.py
r3797 r3821 25 25 import db_funcs 26 26 import os 27 import PostgresDBUtils28 27 29 28 #connect to db (in separate db functions module) -
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/oai_ingest_new2.py
r3817 r3821 34 34 import ndgUtils 35 35 from ndgUtils.ndgXqueries import ndgXqueries 36 from ndgUtils.ndgObject import ndgObject37 36 from FileUtilities import FileUtilities 38 37 from PostgresRecord import PostgresRecord … … 65 64 Add a file to the postgres DB - extracting and storing all the required 66 65 data in the process 66 @param filename: full path of file to add to postgres DB 67 67 ''' 68 68 logging.info("Adding file, " + filename + ", to postgres DB") 69 69 discoveryID = getID(filename) 70 70 71 # NB, if we're dealing with an NDG data provider, the details are slightly different72 if NDG_dataProvider:73 discObj=ndgObject(discoveryID)74 discoveryID = discObj.localID75 datacentre_namespace = discObj.repository76 77 71 # first of all create a PostgresRecord - this object represents all the data required 78 72 # for a DB entry 79 record = PostgresRecord(filename, datacentre_groups, datacentre_namespace, discoveryID, xq, datacentre_format)73 record = PostgresRecord(filename, NDG_dataProvider, datacentre_groups, datacentre_namespace, discoveryID, xq, datacentre_format) 80 74 81 75 # Now create the data access object to interface to the DB … … 145 139 146 140 logging.info("datacentre namespace: " + datacentre_namespace) 141 142 if NDG_dataProvider: 143 logging.info("Datacentre classified as an NDG data provider") 144 else: 145 logging.info("Datacentre is not classificied as an NDG data provider") 147 146 print lineSeparator 148 147 … … 160 159 print lineSeparator 161 160 print "RUNNING: oai_ingest.py" 162 163 verboseMode = False164 165 161 166 162 # check for verbose option … … 276 272 277 273 # now set up the required XQueries 274 # - NB, extract the xquery libraries locally for easy reference 278 275 xq=ndgXqueries() 276 for libFile in xq.xqlib: 277 fileUtils.createFile(libFile, xq.xqlib[libFile]) 279 278 280 279 # Process the resulting files and put the data into the postgres DB
Note: See TracChangeset
for help on using the changeset viewer.