Changeset 3809 for TI01-discovery
- Timestamp:
- 21/04/08 13:07:36 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/PostgresRecord.py
r3800 r3809 29 29 @param ndgDataProvider: If True, data has come from a NDG dataprovider, otherwise False 30 30 ''' 31 def __init__(self,filename, ndgDataProvider): 32 self._filename = filename # TODO: not sure we need to store this? 33 self._ndgDataProvider = ndgDataProvider 31 documentTypes = ['DIF', 'MOLES', 'DC', 'MDIP', 'ISO19139'] 32 33 def __init__(self, filename, ndgDataProvider, targetCollection, datacentre_namespace, discovery_id, xq, docType): 34 self.filename = filename # TODO: not sure we need to store this? 35 self._targetColection = targetCollection 36 self._repository = datacentre_namespace 37 self.discovery_id = discovery_id 38 self._xq = xq 39 self.docType = docType 40 41 self._allDocs = [] # array to store all the transformed docs - for easy retrieval by the DAO 42 43 # if ndgDataProvider: 44 # discObj=ndgObject(discovery_id) 45 # self._repository = discObj.repository 46 # self.discovery_id = discObj.localID 47 34 48 # firstly load contents of file 35 xml = file(filename).read() 49 self.originalFormat = file(filename).read() 50 36 51 # we use loadET to protect ourselves from scummy characters and unicode problems 37 self._originalFormat = loadET(xml) 52 # DO WE NEED TO DO THIS?? 53 self.correctedFormat = loadET(self.originalFormat) 54 55 38 56 #debugging stuff 39 57 print "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv" 40 # print xml 41 # print self. _originalFormat.keys()42 # for i in self. _originalFormat: print i.tag43 # print dir(self. _originalFormat)58 print self.correctedFormat 59 # print self.originalFormat.keys() 60 # for i in self.originalFormat: print i.tag 61 # print dir(self.originalFormat) 44 62 print "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv" 45 63 #we use nsdumb in case the namespace causes difficulties ... 46 helper=nsdumb(self._originalFormat)64 # helper=nsdumb(self.originalFormat) 47 65 #print helper 48 # self.id=helper.getText(self. _originalFormat,'DatasetIdentifier')66 # self.id=helper.getText(self.originalFormat,'DatasetIdentifier') 49 67 50 68 51 69 # initialise the various record fields 52 70 self.db_id = None # the DB ID of the record, for easy reference when it is created 53 self._molesFormat = None 54 self._dcFormat = None 71 self.molesFormat = None 72 self.dcFormat = None 73 self.mdipFormat = None 74 self.iso19139Format = None 75 55 76 self.getSpatioTemporalData() 56 77 self.doRecordTransforms() 57 58 59 def listify(item): 60 ''' 61 listify checks if an item is a list, if it isn't it puts it 62 inside a list and returns it. Always returns a list object. 63 @param item: object to check 64 @return: item as a list object 65 ''' 66 if type(item) is list: 67 return item 68 else: 69 return [item] 70 71 def getMolesFormat(self): 72 ''' 73 Return the document in MOLES format 74 ''' 75 # Initially, check the transform hasn't already been done; if it has, 76 # return this directly 77 if (self._molesFormat) is not None: 78 return self._molesFormat 79 80 # Now, do the transform 81 xq=ndgXqueries() 82 host = 'localhost' 83 xmldb=ndg_eXist(db='' + host + '') # NB, test this works 84 if self._ndgDataProvider: 85 #discObj=ndgObject('disc_idaaaaaaaa') 86 xquery=xq.actual('dif2moles', self._filename,'blah1', 'blah_id1')#discObj.repository,discObj.localID) 87 else: 88 xquery=xq.actual('dif2moles', self._filename, 'blah', 'blah_id')#datacentre_namespace,disc_id) 89 90 #create the mini-moles for each Discovery record in the collection 91 #for member in ndgDir.members: 92 # #print member 93 filename= 'ffff'# member['fileName'] 94 disc_id = 'discccc'#member['EntryID'] 95 # print "INFO: internal id = %s" %disc_id 96 # print "INFO: discovery filename = %s" %filename 97 # now create the xquery 98 # sort out the output ID stuff ... 99 # and then sort out the input ID stuff 100 xquery=xquery.replace('Input_Entry_ID',disc_id) 101 xquery=xquery.replace('repository_localid', 'datacentreeeeeee')#datacentre_namespace ) 102 103 print "WWWWWWWWWWWWWWWW", xquery 78 79 80 def doTransform(xQueryType): 81 ''' 82 Transform the record according to the specified XQuery type 83 @param xQueryType: XQuery doc to use to do the transform 84 @return: the metadata record in the required transformed format 85 ''' 86 xquery = self._xq.actual(xQueryType, self._targetCollection, self._datacentre_namespace, self.discovery_id) 87 88 # and then sort out the input ID stuff 89 xquery=xquery.replace('Input_Entry_ID', self.discovery_id) 90 xquery=xquery.replace('repository_localid', self._datacentre_namespace) 91 92 # Now do the transform 104 93 xqCommand = "java -cp /home/users/cbyrom/opt/saxonsa/saxon9sa.jar:/home/users/cbyrom/opt/saxonsa net.sf.saxon.Query -sa -t -s " + \ 105 94 self._filename + " /home/users/cbyrom/tmp/ingestAutomation/OAIBatch/xquery/dif2moles.xq" #\"{" + xquery + "}\"" … … 109 98 if status !=0: 110 99 sys.exit("Failed at running the XQuery")#, %s" %xqCommand) 100 101 print "INFO: Running XQuery transform to create minimoles document" 102 molesid, s = xmldb.executeQuery(xquery) 103 return xmldb.retrieve(molesid,0) 104 105 106 def getDocumentFormat(docType): 107 ''' 108 Lookup document format; if it is already defined then return it, else do the required XQuery 109 transform 110 @param docType: format of document to return 111 ''' 112 print "INFO: Retrieving document type, %s" %docType 113 xqName = {'DIF':'moles2dif', 'MOLES':'moles', 'DC':'moles2DC', 'MDIP':'moles2mdip', 'ISO19139':'moles2iso19139'}[docType] 114 attributeName = {'DIF':'_difFormat', 'MOLES':'_molesFormat', 'DC':'_dcFormat', 'MDIP':'_mdipFormat', 'ISO19139':'_iso19139Format'}[docType] 115 116 # check the document isn't already defined 117 doc = getattr(self, attributeName) 118 if doc is not None: 119 return doc 120 121 # the doc type doesn't exist - so run the xquery 122 setattr(self, attributeName, doTransform(xqName)) 123 111 124 112 #print xq.help('dif2moles') 113 # molesid,s=xmldb.executeQuery(xquery) 114 # moles_from_dif=xmldb.retrieve(molesid,0) 115 #print moles_from_dif 116 # now write out xml to file 117 # f=open(out_dir+"/"+filename,'w') 118 # f.write(moles_from_dif) 119 # f.close() 120 121 def getDCFormat(self): 122 ''' 123 Return the document in Dublin Core format 124 ''' 125 # Initially, check the transform hasn't already been done; if it has, 126 # return this directly 127 if (self._dcFormat) is not None: 128 return self._dcFormat 129 130 # Now, do the transform 131 132 133 def getOriginalFormat(self): 134 ''' 135 Return the original document 136 ''' 137 return self._originalFormat 138 125 def getAllDocs(): 126 ''' 127 Return a list of all the available doc types in the record 128 ''' 129 if len(self._allDocs) > 0: 130 return self._allDocs 131 132 for docType in documentTypes: 133 self._allDocs.append([docType, getDocumentFormat(docType)]) 134 return self._allDocs 139 135 140 136 def doRecordTransforms(self): … … 143 139 the other types of doc used elsewhere 144 140 ''' 145 self.getMolesFormat() 146 self.getDCFormat() 141 for docType in documentTypes: 142 getDocumentFormat(docType) 143 144 145 def listify(item): 146 ''' 147 listify checks if an item is a list, if it isn't it puts it 148 inside a list and returns it. Always returns a list object. 149 @param item: object to check 150 @return: item as a list object 151 ''' 152 if type(item) is list: 153 return item 154 else: 155 return [item] 147 156 148 157 … … 163 172 dgMeta=MRW.dgMetadata() 164 173 try: 165 dgMeta.fromXML(cElementTree.ElementTree(file=self. _filename).getroot())174 dgMeta.fromXML(cElementTree.ElementTree(file=self.filename).getroot()) 166 175 except: 167 print "WARNING: Cannot parse the XML moles document %s. Will not process" %self. _filename176 print "WARNING: Cannot parse the XML moles document %s. Will not process" %self.filename 168 177 return 169 178 try: 170 179 bbox_list=listify(dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgSpatialCoverage.BoundingBox) 171 180 except: 172 print "INFO: XML moles document %s does not contain a bounding box." %self. _filename181 print "INFO: XML moles document %s does not contain a bounding box." %self.filename 173 182 no_bbox=True 174 183 … … 178 187 print "enddate = %s" %dates.DateRangeEnd 179 188 except: 180 print "INFO: XML moles document %s does not contain temporal info." %self. _filename189 print "INFO: XML moles document %s does not contain temporal info." %self.filename 181 190 no_dates=True 182 191 183 192 if no_bbox and no_dates: 184 print "INFO: XML moles document %s does not contain any spatiotemporal info." %self. _filename193 print "INFO: XML moles document %s does not contain any spatiotemporal info." %self.filename 185 194 return 186 195 … … 201 210 west = bbox.LimitWest.strip() 202 211 except: 203 print "ERROR: Will not process File %s. Contains incorrect West bounding box limit." %self. _filename212 print "ERROR: Will not process File %s. Contains incorrect West bounding box limit." %self.filename 204 213 return 205 214 if west.endswith('E'): … … 213 222 float(west) 214 223 except: 215 print "ERROR: Will not process File %s. Contains incorrect West bounding box limit." %self. _filename224 print "ERROR: Will not process File %s. Contains incorrect West bounding box limit." %self.filename 216 225 return 217 226 self.west = west … … 220 229 east = bbox.LimitEast.strip() 221 230 except: 222 print "ERROR: Will not process File %s. Contains incorrect East bounding box limit." %self. _filename231 print "ERROR: Will not process File %s. Contains incorrect East bounding box limit." %self.filename 223 232 return 224 233 if east.endswith('E'): … … 232 241 float(east) 233 242 except: 234 print "ERROR: Will not process File %s. Contains incorrect East bounding box limit." %self. _filename243 print "ERROR: Will not process File %s. Contains incorrect East bounding box limit." %self.filename 235 244 return 236 245 self.east = east … … 239 248 north = bbox.LimitNorth.strip() 240 249 except: 241 print "ERROR: Will not process File %s. Contains incorrect North bounding box limit." %self. _filename250 print "ERROR: Will not process File %s. Contains incorrect North bounding box limit." %self.filename 242 251 return 243 252 if north.endswith('N'): … … 251 260 float(north) 252 261 except: 253 print "ERROR: Will not process File %s. Contains incorrect North bounding box limit." %self. _filename262 print "ERROR: Will not process File %s. Contains incorrect North bounding box limit." %self.filename 254 263 return 255 264 self.north = north … … 258 267 south = bbox.LimitSouth.strip() 259 268 except: 260 print "ERROR: Will not process File %s. Contains incorrect South bounding box limit." %self. _filename269 print "ERROR: Will not process File %s. Contains incorrect South bounding box limit." %self.filename 261 270 return 262 271 if south.endswith('N'): … … 270 279 float(south) 271 280 except: 272 print "ERROR: Will not process File %s. Contains incorrect North bounding box limit." %self. _filename281 print "ERROR: Will not process File %s. Contains incorrect North bounding box limit." %self.filename 273 282 return 274 283 self.south = south … … 279 288 280 289 281 282 283 284 285 286 287 288 289 290 def hasNullCoords(): 291 if str(self.west)=='null' or \ 292 str(self.south)=='null' or \ 293 str(self.east)=='null' or \ 294 str(self.north)=='null': 295 return True; 296 else: 297 return False; 298
Note: See TracChangeset
for help on using the changeset viewer.