Changeset 3847


Ignore:
Timestamp:
02/05/08 15:17:46 (11 years ago)
Author:
cbyrom
Message:

Create new object, SpatioTemporalData?, to wrapper temporal and spatial data
+ change PostgresRecord? to include all spatiotemporal data from a moles file
using this new object.

Location:
TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch
Files:
1 added
1 edited

Legend:

Unmodified
Added
Removed
  • TI01-discovery/branches/ingestAutomation-upgrade/OAIBatch/PostgresRecord.py

    r3846 r3847  
    1919from ndgUtils.ndgObject import ndgObject 
    2020from FileUtilities import FileUtilities 
     21from SpatioTemporalData import SpatioTemporalData 
    2122 
    2223class PostgresRecord: 
     
    269270        Extract spatio temporal data from the original document 
    270271        ''' 
    271         #this is a fix to the  ElementTree namespace problem that namespaces are usually  
    272         # represented as ns0, ns1, ns2 etc. 
    273         #cElementTree._namespace_map.update({'http://ndg.nerc.ac.uk/moles': 'moles', 'http://www.w3.org/1999/xlink':'xlink'}) 
    274         self.east = 'null' 
    275         self.west = [] 
    276         self.north = 'null' 
    277         self.south = 'null' 
    278         self.startdate='nostartdate' 
    279         self.enddate='noenddate' 
     272        # initialise the various spatiotemporal arrays used to extract data to 
     273        self.stData = SpatioTemporalData() 
    280274         
    281275        molesFile = self._molesDir + self._shortFilename 
    282          
     276        logging.info('Retrieving spatiotemporal info from moles file, %s' %molesFile) 
     277         
     278        # load in the moles file and put this into an object for direct access to the xml elements 
    283279        dgMeta=MRW.dgMetadata() 
    284280        try: 
     
    286282        except Exception, detail: 
    287283            raise SystemError, 'Cannot parse the XML moles document %s. Detail:\n%s' %(molesFile, detail) 
    288          
    289         bbox_list = [] 
     284 
     285        # do quick checks to see if the relevant data exists 
     286        if not dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary: 
     287            logging.info("No data summary elements found - assuming no spatiotemporal data available") 
     288            return 
     289         
     290        if not dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage: 
     291            logging.info("No data coverage elements found - assuming no spatiotemporal data available") 
     292            return 
     293         
     294        if not dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgSpatialCoverage: 
     295            logging.info("No spatial coverage elements found - assuming no spatial data available") 
     296        else: 
     297            self.getCoordData(dgMeta) 
     298 
     299        if not dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgTemporalCoverage: 
     300            logging.info("No temporal coverage elements found - assuming no temporal data available") 
     301        else: 
     302            self.getTimeRangeData(dgMeta) 
     303 
     304 
     305    def getTimeRangeData(self, dgMeta): 
     306        ''' 
     307        Parse an xml tree and add any time range data found 
     308        @param dgMeta: xml fragment for the time range 
     309        ''' 
     310        logging.info("Extracting time range info") 
    290311        try: 
    291             logging.info("Extracting bounding box info") 
    292             bbox_list=self.listify(dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgSpatialCoverage.BoundingBox) 
     312            dates = dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgTemporalCoverage.DateRange 
     313             
     314            if not dates: 
     315                logging.info("No temporal info found for document") 
     316                 
     317            dates_list = self.listify(dates) 
     318            for date in dates_list: 
     319                startdate=date.DateRangeStart 
     320                enddate= date.DateRangeEnd 
     321                if startdate==None or startdate=='None': 
     322                    startdate="nostartdate" 
     323                if enddate==None or enddate=='None': 
     324                    enddate="noenddate" 
     325                     
     326                self.stData.addTimeRange(startdate, enddate) 
     327                logging.info("Temporal info: startdate " + \ 
     328                             startdate + ", enddate " + enddate)  
     329        except Exception, detail: 
     330            logging.info("Document does not contain temporal info.") 
     331            logging.info(detail) 
     332 
     333         
     334    def getCoordData(self, dgMeta): 
     335        ''' 
     336        Parse an xml tree and add any coord data found 
     337        @param dgMeta: xml fragment for the bounding boxes 
     338        ''' 
     339        logging.info("Extracting bounding box info") 
     340        try: 
     341 
     342            bboxes = dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgSpatialCoverage.BoundingBox 
     343             
     344            if not bboxes: 
     345                logging.info("No bounding box info found for document") 
     346                return 
     347                 
     348            bbox_list=self.listify(bboxes) 
    293349            #parse the list of coordinates 
    294350            for bbox in bbox_list: 
    295                 self.west.append(self.parseCoord(bbox.LimitWest, 'W', 'E')) 
    296                 self.east = self.parseCoord(bbox.LimitEast, 'W', 'E') 
    297                 self.north = self.parseCoord(bbox.LimitNorth, 'S', 'N') 
    298                 self.south = self.parseCoord(bbox.LimitSouth, 'S', 'N') 
     351                north = self.parseCoord(bbox.LimitNorth, 'S', 'N') 
     352                south = self.parseCoord(bbox.LimitSouth, 'S', 'N') 
     353                east = self.parseCoord(bbox.LimitEast, 'W', 'E') 
     354                west = self.parseCoord(bbox.LimitWest, 'W', 'E') 
     355                self.stData.addCoords(north, south, east, west) 
     356                logging.info("Spatial info: west= " + west + ",south " + south + ", east " + \ 
     357                    east + ", north " + north + "") 
     358                 
    299359        except Exception, detail: 
    300             logging.info("XML moles document " + molesFile + " does not contain a bounding box.") 
    301             logging.debug(detail) 
    302  
    303         try: 
    304             dates=dgMeta.dgMetadataRecord.dgDataEntity.dgDataSummary.dgDataCoverage.dgTemporalCoverage.DateRange 
    305             print "startdate = %s" %dates.DateRangeStart 
    306             print "enddate = %s" %dates.DateRangeEnd 
    307         except: 
    308             logging.info("XML moles document " + molesFile + " does not contain temporal info.") 
    309             no_dates=True 
    310  
    311         if no_bbox and no_dates: 
    312             logging.info("XML moles document " + molesFile + " does not contain any spatiotemporal info.") 
    313             return 
    314  
    315         if not no_dates: 
    316             startdate=dates.DateRangeStart 
    317             enddate= dates.DateRangeEnd 
    318             if startdate==None or startdate=='None': 
    319                 startdate="nostartdate" 
    320             if enddate==None or enddate=='None': 
    321                 enddate="noenddate" 
    322             self.startdate = startdate 
    323             self.enddate = enddate 
    324  
    325  
    326         logging.info("Spatial info: west= " + self.west + ",south " + self.south + ", east " + \ 
    327                     self.east + ", north " + self.north + "") 
    328         logging.info("Temporal info: startdate " + self.startdate + ", enddate " + self.enddate)  
    329  
     360            logging.warning("Problem encountered whilst parsing bounding box info - this may lead \n" + \ 
     361                            "to an incomplete set of metadata being ingested. \nDetail: %s" %detail) 
    330362 
    331363 
     
    339371        @return: coord - the value of the coordinate as a string    
    340372        ''' 
    341         logging.info("Parsing document coordinates") 
     373        logging.debug("Parsing document coordinates") 
    342374        try: 
    343375            coord = coordValue.strip() 
Note: See TracChangeset for help on using the changeset viewer.