Changeset 3847 for TI01-discovery
- Timestamp:
- 02/05/08 15:17:46 (13 years ago)
- Location:
- TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch
- Files:
-
- 1 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/PostgresRecord.py
r3846 r3847 19 19 from ndgUtils.ndgObject import ndgObject 20 20 from FileUtilities import FileUtilities 21 from SpatioTemporalData import SpatioTemporalData 21 22 22 23 class PostgresRecord: … … 269 270 Extract spatio temporal data from the original document 270 271 ''' 271 #this is a fix to the ElementTree namespace problem that namespaces are usually 272 # represented as ns0, ns1, ns2 etc. 273 #cElementTree._namespace_map.update({'http://ndg.nerc.ac.uk/moles': 'moles', 'http://www.w3.org/1999/xlink':'xlink'}) 274 self.east = 'null' 275 self.west = [] 276 self.north = 'null' 277 self.south = 'null' 278 self.startdate='nostartdate' 279 self.enddate='noenddate' 272 # initialise the various spatiotemporal arrays used to extract data to 273 self.stData = SpatioTemporalData() 280 274 281 275 molesFile = self._molesDir + self._shortFilename 282 276 logging.info('Retrieving spatiotemporal info from moles file, %s' %molesFile) 277 278 # load in the moles file and put this into an object for direct access to the xml elements 283 279 dgMeta=MRW.dgMetadata() 284 280 try: … … 286 282 except Exception, detail: 287 283 raise SystemError, 'Cannot parse the XML moles document %s. Detail:\n%s' %(molesFile, detail) 288 289 bbox_list = [] 284 285 # do quick checks to see if the relevant data exists 286 if not dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary: 287 logging.info("No data summary elements found - assuming no spatiotemporal data available") 288 return 289 290 if not dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage: 291 logging.info("No data coverage elements found - assuming no spatiotemporal data available") 292 return 293 294 if not dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgSpatialCoverage: 295 logging.info("No spatial coverage elements found - assuming no spatial data available") 296 else: 297 self.getCoordData(dgMeta) 298 299 if not dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgTemporalCoverage: 300 logging.info("No temporal coverage elements found - assuming no temporal data available") 301 else: 302 self.getTimeRangeData(dgMeta) 303 304 305 def getTimeRangeData(self, dgMeta): 306 ''' 307 Parse an xml tree and add any time range data found 308 @param dgMeta: xml fragment for the time range 309 ''' 310 logging.info("Extracting time range info") 290 311 try: 291 logging.info("Extracting bounding box info") 292 bbox_list=self.listify(dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgSpatialCoverage.BoundingBox) 312 dates = dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgTemporalCoverage.DateRange 313 314 if not dates: 315 logging.info("No temporal info found for document") 316 317 dates_list = self.listify(dates) 318 for date in dates_list: 319 startdate=date.DateRangeStart 320 enddate= date.DateRangeEnd 321 if startdate==None or startdate=='None': 322 startdate="nostartdate" 323 if enddate==None or enddate=='None': 324 enddate="noenddate" 325 326 self.stData.addTimeRange(startdate, enddate) 327 logging.info("Temporal info: startdate " + \ 328 startdate + ", enddate " + enddate) 329 except Exception, detail: 330 logging.info("Document does not contain temporal info.") 331 logging.info(detail) 332 333 334 def getCoordData(self, dgMeta): 335 ''' 336 Parse an xml tree and add any coord data found 337 @param dgMeta: xml fragment for the bounding boxes 338 ''' 339 logging.info("Extracting bounding box info") 340 try: 341 342 bboxes = dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgSpatialCoverage.BoundingBox 343 344 if not bboxes: 345 logging.info("No bounding box info found for document") 346 return 347 348 bbox_list=self.listify(bboxes) 293 349 #parse the list of coordinates 294 350 for bbox in bbox_list: 295 self.west.append(self.parseCoord(bbox.LimitWest, 'W', 'E')) 296 self.east = self.parseCoord(bbox.LimitEast, 'W', 'E') 297 self.north = self.parseCoord(bbox.LimitNorth, 'S', 'N') 298 self.south = self.parseCoord(bbox.LimitSouth, 'S', 'N') 351 north = self.parseCoord(bbox.LimitNorth, 'S', 'N') 352 south = self.parseCoord(bbox.LimitSouth, 'S', 'N') 353 east = self.parseCoord(bbox.LimitEast, 'W', 'E') 354 west = self.parseCoord(bbox.LimitWest, 'W', 'E') 355 self.stData.addCoords(north, south, east, west) 356 logging.info("Spatial info: west= " + west + ",south " + south + ", east " + \ 357 east + ", north " + north + "") 358 299 359 except Exception, detail: 300 logging.info("XML moles document " + molesFile + " does not contain a bounding box.") 301 logging.debug(detail) 302 303 try: 304 dates=dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgTemporalCoverage.DateRange 305 print "startdate = %s" %dates.DateRangeStart 306 print "enddate = %s" %dates.DateRangeEnd 307 except: 308 logging.info("XML moles document " + molesFile + " does not contain temporal info.") 309 no_dates=True 310 311 if no_bbox and no_dates: 312 logging.info("XML moles document " + molesFile + " does not contain any spatiotemporal info.") 313 return 314 315 if not no_dates: 316 startdate=dates.DateRangeStart 317 enddate= dates.DateRangeEnd 318 if startdate==None or startdate=='None': 319 startdate="nostartdate" 320 if enddate==None or enddate=='None': 321 enddate="noenddate" 322 self.startdate = startdate 323 self.enddate = enddate 324 325 326 logging.info("Spatial info: west= " + self.west + ",south " + self.south + ", east " + \ 327 self.east + ", north " + self.north + "") 328 logging.info("Temporal info: startdate " + self.startdate + ", enddate " + self.enddate) 329 360 logging.warning("Problem encountered whilst parsing bounding box info - this may lead \n" + \ 361 "to an incomplete set of metadata being ingested. \nDetail: %s" %detail) 330 362 331 363 … … 339 371 @return: coord - the value of the coordinate as a string 340 372 ''' 341 logging. info("Parsing document coordinates")373 logging.debug("Parsing document coordinates") 342 374 try: 343 375 coord = coordValue.strip()
Note: See TracChangeset
for help on using the changeset viewer.