Changeset 3816 for TI01-discovery
- Timestamp:
- 23/04/08 09:55:42 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/PostgresRecord.py
r3809 r3816 16 16 #ET._namespace_map.update({'http://www.oceannet.org/mdip/xml': 'mdip', 'http://www.w3.org/1999/xlink':'xlink'}) 17 17 18 import os, sys, logging 18 19 from ETxmlView import loadET, nsdumb 19 20 import molesReadWrite as MRW 20 from ndgUtils.ndgXqueries import ndgXqueries21 import os, sys22 from ndgUtils.eXistInterface import ndg_eXist23 from ndgUtils.ndgObject import ndgObject24 21 25 22 class PostgresRecord: … … 27 24 Class representing the contents of a row in the metadata_record postgres DB table 28 25 @param filename: Name of file to use a metadata record 29 @param ndgDataProvider: If True, data has come from a NDG dataprovider, otherwise False26 @param 30 27 ''' 31 documentTypes = ['DIF', 'MOLES', 'DC', 'MDIP', 'ISO19139'] 32 33 def __init__(self, filename, ndgDataProvider, targetCollection, datacentre_namespace, discovery_id, xq, docType): 34 self.filename = filename # TODO: not sure we need to store this? 35 self._targetColection = targetCollection 28 documentTypes = ['MOLES', 'DIF', 'DC', 'MDIP', 'ISO19139'] 29 30 def __init__(self, filename, datacentre_groups, datacentre_namespace, discovery_id, xq, docType): 31 logging.info("Setting up Postgres record for file, " + filename) 32 self.filename = filename 33 self._datacentre_groups = datacentre_groups 36 34 self._repository = datacentre_namespace 37 35 self.discovery_id = discovery_id 38 36 self._xq = xq 39 37 self.docType = docType 40 38 39 self._molesFormat = None # initialise this, so we can guarantee a value - to avoid using getattr 41 40 self._allDocs = [] # array to store all the transformed docs - for easy retrieval by the DAO 42 43 # if ndgDataProvider:44 # discObj=ndgObject(discovery_id)45 # self._repository = discObj.repository46 # self.discovery_id = discObj.localID47 41 48 42 # firstly load contents of file … … 55 49 56 50 #debugging stuff 57 print "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv" 58 print self.correctedFormat51 # self.logger.printOutput("vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv") 52 # print self.correctedFormat 59 53 # print self.originalFormat.keys() 60 54 # for i in self.originalFormat: print i.tag 61 55 # print dir(self.originalFormat) 62 print "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv" 56 # self.logger.printOutput("vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv") 63 57 #we use nsdumb in case the namespace causes difficulties ... 64 58 # helper=nsdumb(self.originalFormat) … … 74 68 self.iso19139Format = None 75 69 70 # do some initial setting up of record 71 # self.setUpRecord() 72 self.doRecordTransforms() 76 73 self.getSpatioTemporalData() 77 self.doRecordTransforms() 78 79 80 def doTransform(xQueryType): 74 75 76 def doRecordTransforms(self): 77 ''' 78 Run various transforms on the original doc, to populate the record with 79 the other types of doc used elsewhere 80 ''' 81 logging.info("Running transforms for all document types") 82 for docType in self.documentTypes: 83 self.getDocumentFormat(docType) 84 logging.info("Transforms complete") 85 86 87 def doTransform(self, xQueryType): 81 88 ''' 82 89 Transform the record according to the specified XQuery type … … 84 91 @return: the metadata record in the required transformed format 85 92 ''' 86 xquery = self._xq.actual(xQueryType, self._targetCollection, self._datacentre_namespace, self.discovery_id) 87 88 # and then sort out the input ID stuff 93 logging.info("Running XQuery transform, " + xQueryType + " to create transformed document") 94 95 # get the query and set this up to use properly 96 xquery = self._xq.actual(xQueryType, self.filename, self._repository, self.discovery_id) 97 98 # sort out the input ID stuff 89 99 xquery=xquery.replace('Input_Entry_ID', self.discovery_id) 90 xquery=xquery.replace('repository_localid', self._datacentre_namespace) 100 xquery=xquery.replace('repository_localid', self._repository) 101 102 # write the query to file, to make it easier to input 103 # NB, running directly at the command line leads to problems with the interpretation of $ characters 104 xqFile = "currentQuery.xq" 105 f=open(xqFile,'w') 106 f.write(xquery) 107 f.close() 91 108 92 109 # Now do the transform 93 xqCommand = "java -cp /home/users/cbyrom/opt/saxonsa/saxon9sa.jar:/home/users/cbyrom/opt/saxonsa net.sf.saxon.Query -sa -t -s " + \ 94 self._filename + " /home/users/cbyrom/tmp/ingestAutomation/OAIBatch/xquery/dif2moles.xq" #\"{" + xquery + "}\"" 95 print 'HHHHHHHHH' 96 print xqCommand 97 status = os.system(xqCommand) 98 if status !=0: 99 sys.exit("Failed at running the XQuery")#, %s" %xqCommand) 100 101 print "INFO: Running XQuery transform to create minimoles document" 102 molesid, s = xmldb.executeQuery(xquery) 103 return xmldb.retrieve(molesid,0) 104 105 106 def getDocumentFormat(docType): 110 # xqCommand = "java -cp /home/users/cbyrom/opt/saxonsa/saxon9sa.jar:/home/users/cbyrom/opt/saxonsa net.sf.saxon.Query " + \ 111 os.putenv ('PATH', ':/usr/java/jdk1.5.0_03/bin:/usr/java/jdk1.5.0_03:/usr/java/jdk1.5.0_03/lib/tools.jar:/usr/local/WSClients/OAIBatch:/usr/local/exist-client/bin:/bin:/usr/bin:.') 112 xqCommand = "java -cp saxon9.jar net.sf.saxon.Query " + xqFile 113 logging.debug("Running saxon command: " + xqCommand) 114 pipe = os.popen(xqCommand + " 2>&1") 115 output = pipe.read() 116 status = pipe.close() 117 118 print output 119 print "ss,", status 120 if status is not None: 121 sys.exit("Failed at running the XQuery") 122 123 # now remove the temp xquery file 124 status = os.unlink(xqFile) 125 if status is not None: 126 sys.exit("Failed to remove the temporary xquery file, " + xqFile) 127 128 logging.info("Transform completed successfully") 129 130 return output 131 132 133 def doMolesTransform(self): 134 ''' 135 Set up the basic moles doc - according to the type of document first ingested 136 ''' 137 logging.info("Creating moles document - for use with other transforms") 138 xqName = None 139 if self.docType == "DIF": 140 xqName = "dif2moles" 141 elif self.docType == "MDIP": 142 xqName = "mdip2moles" 143 else: 144 sys.exit("ERROR: No XQuery exists to transform input document type, %s, into moles format" \ 145 %self.docType) 146 147 # add keywords, if required 148 if self._datacentre_groups != "": 149 addKeywords() 150 151 # now run the appropriate transform and set the attribute 152 setattr(self, "_molesFormat", self.doTransform(xqName)) 153 logging.info("moles document created") 154 155 156 def addKeywords(self): 157 ''' 158 If datacentre groups have been specified, these need to be added as keywords 159 - NB, this is rather clumsy approach but uses old code to achieve the result 160 ''' 161 logging.info("Adding datacentre keywords to moles file") 162 # NB, use temporary directories to do the keyword additions 163 tmpDir = os.getcwd() + "/" 164 tmpKeywordsDir = os.getcwd() + "/kewordsAdded/" 165 fileUtils = FileUtilities(self.logger) 166 fileUtils.setUpDir(tmpDir) 167 fileUtils.setUpDir(tmpKeywordsDir) 168 tmpFile = 'tmpFile.xml' 169 f=open(tmpDir + "/" + tmpFile,'w') 170 f.write(self._molesFormat) 171 f.close() 172 173 keywordAdder.main(tmpDir, tmpKeywordsDir, self.datacentre_groups) 174 175 # Now load in the converted file 176 f=open(tmpKeywordsDir + "/" + tmpFile, 'r') 177 self._molesFormat = f.read() 178 f.close 179 180 # Finally, tidy up temp dirs 181 fileUtils.cleanDir(tmpDir) 182 fileUtils.clearDir(tmpKeywordsDir) 183 logging.info("Completed adding keywords") 184 185 186 def getDocumentFormat(self, docType): 107 187 ''' 108 188 Lookup document format; if it is already defined then return it, else do the required XQuery 109 transform 189 transform. NB, transforms are ran on the molesFormat document - so ensure this is available 110 190 @param docType: format of document to return 111 191 ''' 112 print "INFO: Retrieving document type, %s" %docType192 logging.info("Retrieving document type, " + docType) 113 193 xqName = {'DIF':'moles2dif', 'MOLES':'moles', 'DC':'moles2DC', 'MDIP':'moles2mdip', 'ISO19139':'moles2iso19139'}[docType] 114 194 attributeName = {'DIF':'_difFormat', 'MOLES':'_molesFormat', 'DC':'_dcFormat', 'MDIP':'_mdipFormat', 'ISO19139':'_iso19139Format'}[docType] 115 195 196 # check we have the moles format available; if not create it 197 if self._molesFormat is None: 198 self.doMolesTransform() 199 116 200 # check the document isn't already defined 117 doc = getattr(self, attributeName) 118 if doc is not None: 119 return doc 201 try: 202 doc = getattr(self, attributeName) 203 if doc is not None: 204 logging.info("Found existing document - returning this now") 205 return doc 206 except: 207 logging.info("Creating new transformed document") 120 208 121 209 # the doc type doesn't exist - so run the xquery 122 setattr(self, attributeName, doTransform(xqName))210 setattr(self, attributeName, self.doTransform(xqName)) 123 211 124 212 125 def getAllDocs( ):213 def getAllDocs(self): 126 214 ''' 127 215 Return a list of all the available doc types in the record … … 133 221 self._allDocs.append([docType, getDocumentFormat(docType)]) 134 222 return self._allDocs 135 136 def doRecordTransforms(self):137 '''138 Run various transforms on the original doc, to populate the record with139 the other types of doc used elsewhere140 '''141 for docType in documentTypes:142 getDocumentFormat(docType)143 223 144 224 … … 174 254 dgMeta.fromXML(cElementTree.ElementTree(file=self.filename).getroot()) 175 255 except: 176 print "WARNING: Cannot parse the XML moles document %s. Will not process" %self.filename256 logging.warning("WARNING: Cannot parse the XML moles document %s. Will not process" %self.filename) 177 257 return 178 258 try: 179 259 bbox_list=listify(dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgSpatialCoverage.BoundingBox) 180 260 except: 181 print "INFO: XML moles document %s does not contain a bounding box." %self.filename 261 logging.info("XML moles document " + self.filename + \ 262 " does not contain a bounding box.") 182 263 no_bbox=True 183 264 … … 187 268 print "enddate = %s" %dates.DateRangeEnd 188 269 except: 189 print "INFO: XML moles document %s does not contain temporal info." %self.filename270 logging.info("XML moles document " + self.filename + " does not contain temporal info.") 190 271 no_dates=True 191 272 192 273 if no_bbox and no_dates: 193 print "INFO: XML moles document %s does not contain any spatiotemporal info." %self.filename274 logging.info("XML moles document " + self.filename + " does not contain any spatiotemporal info.") 194 275 return 195 276 … … 283 364 self.south = south 284 365 285 print "Spatial info: west= %s,south %s, east %s, north %s"\286 %(self.west,self.south,self.east,self.north)287 print "Temporal info: startdate %s, enddate %s" %(self.startdate, self.enddate)366 logging.info("Spatial info: west= " + self.west + ",south " + self.south + ", east " + \ 367 self.east + ", north " + self.north + "") 368 logging.info("Temporal info: startdate " + self.startdate + ", enddate " + self.enddate) 288 369 289 370
Note: See TracChangeset
for help on using the changeset viewer.