source: ndgCommon/trunk/ndg/common/src/models/vocabtermdata.py @ 5129

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/models/vocabtermdata.py@5136
Revision 5129, 28.2 KB checked in by cbyrom, 11 years ago (diff)

Update browse service reference.

Line 
1#!/usr/bin/env python
2'''
3 Class to store and access the various vocab term data
4 
5 @author: C Byrom, Tessella Jul 2008
6'''
7import logging, time, re, urllib
8from ndg.common.src.lib.utilities import openURLWithDefaultProxy
9   
10def isValidTermURI(uri):
11    '''
12    Determines whether a specific vocab term uri is valid - NB, using the
13    utilities.checkURL method won't typically work since the RDF data is
14    exposed directly - i.e. without HEAD information which is usually looked
15    for
16    @param uri: vocab term uri to check
17    @return: True if valid, false otherwise
18    '''
19    logging.debug("Checking vocab term uri, '%s'" %uri)
20    try:
21        pageData = openURLWithDefaultProxy(uri)
22        if pageData.find('<rdf:RDF') > -1:
23            logging.debug("- found valid term")
24            return True
25       
26    except Exception, e:
27        logging.error("Exception thrown whilst verifying uri: '%s'" %e.message)
28
29    logging.debug("- term appears to be invalid")
30    return False
31                     
32
33class VocabTermItem(object):
34    '''
35    Class representing single vocab term item
36    '''
37    def __init__(self, vocabURL, termID, title=None):
38        self.vocabURL = vocabURL
39        self.termID = termID
40        self.title = title
41
42
43class VocabTermDataError(ValueError):
44    """
45    Exception handling for VocabTermData class.
46    """
47    def __init__(self, msg):
48        logging.error(msg)
49        ValueError.__init__(self, msg)
50
51
52class VocabTermData(object):
53    '''
54    Class representing vocab term data - including
55    methods to look these up to ensure they are current
56    '''
57
58    OBJECT_PAGE_TERM = 'ObjectPage'
59    DATA_URL_TERM = 'DataURL'
60    DATA_PAGE_TERM = 'DataPage'
61    CURATOR_PAGE_TERM = 'CuratorPage'
62    EXT_METADATA_TERM = 'ExtMetadata'
63    METADATA_SOURCE_TERM = 'MetadataSource'
64    URI_TERM = 'URI'
65    LOGO_TERM = 'LOGO'
66    NUM_SIM_TERM = 'NumSim'
67    OPENDAP_TERM = 'OPENDAP'
68    THREDDS_TERM = 'THREDDS'
69    WMS_TERM = 'WMS'
70    WCS_TERM = 'WCS'
71    WFS_TERM = 'WFS'
72    LAS_TERM = 'LAS'
73    DATA_EXTRACTOR_TERM = 'DataExtractor' 
74    FILE_BROWSER_TERM = 'FileBrowser'   
75    CSML_TERM = 'CSML'
76   
77    ACTIVITY_TERM = 'ACTIVITY'
78    DPT_TERM = 'DPT'
79    OBS_TERM = 'OBS'
80    GRANULE_TERM = 'GRANULE'
81    DE_TERM = "DE"
82   
83    # dpt subtypes
84    LIDAR_TERM = "dgLidar"
85    RADAR_TERM = "dgRadar"
86    SONDE_TERM = "dgSonde"
87    NAVIGATION_TERM = "dgNavigation"
88    GAS_CHROMATOGRAPH_TERM = "dgGasChromatograph"
89    SPECTROMETER_TERM = "dgSpectrometer"
90    MASS_SPECTROMETER_TERM = "dgMassSpectrometer"
91    MET_SENSOR_TERM = "dgMetSensor"
92    DOAS_TERM = "dgDOAS"
93    ASOZ_TERM = "dgASOZ"
94    RADIOMETER_TERM = "dgRadiometer"
95    FAGE_TERM = "dgFAGE"
96    IMAGER_TERM = "dgImager"
97    FILTER_TERM = "dgFilter"
98    PARTICLE_COUNTER_TERM = "dgParticleCounter"
99    SAMPLER_TERM = "dgSampler"
100    OTHER_INSTRUMENT_TYPE_TERM = "dgOtherInstrumentType"
101    MODEL_TERM = "dgModel"
102    INSTRUMENT_TERM = "dgInstrument"
103   
104    # de subtypes
105    SIMULATION_TERM = "dgSimulation"
106    ANALYSIS_TERM = "dgAnalysis"
107    MEASUREMENT_TERM = "dgMeasurement"
108   
109    # activity subtypes
110    DATA_COLLECTION_TERM = "dgActivityDataCollection"
111    DATA_PROJECT_TERM = "dgActivityDataProject"
112    DATA_CAMPAIGN_TERM = "dgActivityDataCampaign"
113    DATA_INVESTIGATION_TERM = "dgActivityDataInvestigation"
114    FLIGHT_TERM = "dgFlight"
115    CRUISE_TERM = "dgCruise"
116    FUNDING_PROGRAM_TERM = "dgFundingProgram"
117    DEPLOYMENT_TERM = "Deployment"
118    # NB, this is a specialised activity with the subtype deployment pre-set
119    ACTIVITY_DEPLOYMENT_TERM = "ActivityDeployment"
120   
121    # obs subtypes
122    STATIONARY_PLATFORM_TERM = "dgStationaryPlatform"
123    MOVING_PLATFORM_TERM = "dgMovingPlatform"
124    LAND_STATION_TERM = "dgLandStation"
125    MOORING_TERM = "dgMooring"
126    STATION_GROUP_TERM = "dgStationGroup"
127    SHIP_TERM = "dgShip"
128    AIRCRAFT_TERM = "dgAircraft"
129    SATELLITE_TERM = "dgSatellite"
130    COMPUTER_TERM = "dgComputer"
131
132    # provider types
133    BADC_TERM = 'badc.nerc.ac.uk'
134    NEODC_TERM = 'neodc.nerc.ac.uk'
135   
136    TERM_DATA = {
137                 OBJECT_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '1', title = 'Object Home Page'),
138                 DATA_URL_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '2', title = 'Data URL'),
139                 DATA_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '3', title = 'Data Home Page'),
140                 CURATOR_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '4', title = 'Curator Home Page'),
141                 EXT_METADATA_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '5', title = 'Extended Metadata'),
142                 METADATA_SOURCE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '6', title = 'Original Metadata Source'),
143                 #URI_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '3URI', title = 'Data Home Page'),
144                 
145                 BADC_TERM:VocabTermItem('NOT YET SET UP', BADC_TERM, title = 'British Atmospheric Data Centre'),
146                 NEODC_TERM:VocabTermItem('NOT YET SET UP', NEODC_TERM, title = 'NERC Earth Observation Data Centre'),
147                 
148                 LOGO_TERM:VocabTermItem('NOT YET SET UP', LOGO_TERM, title = 'Logo'),
149                 NUM_SIM_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '7', title = 'NumSim description'),
150                 OPENDAP_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU010', title = 'GET DATA &gt; OPENDAP DATA (DODS)'),
151                 THREDDS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU013', title = 'GET DATA &gt; THREDDS DATA'),
152                 WMS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU026', title = 'GET SERVICE &gt; GET WEB MAP SERVICE (WMS)'),
153                 WCS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU024', title = 'GET SERVICE &gt; GET WEB COVERAGE SERVICE (WCS)'),
154                 WFS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU025', title = 'GET SERVICE &gt; GET WEB FEATURE SERVICE (WFS)'),
155                 LAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU006', title = 'GET DATA &gt; LAS'),
156                 DATA_EXTRACTOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '8', title = 'NDG DataExtractor'),
157                 FILE_BROWSER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '9', title = 'File Browser'),
158                 CSML_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N021', '1'),
159                 ACTIVITY_TERM:VocabTermItem('Activity - NOT YET SET UP', ACTIVITY_TERM, title = 'Activity'),
160                 ACTIVITY_DEPLOYMENT_TERM:VocabTermItem('Activity Deployment - NOT YET SET UP', ACTIVITY_DEPLOYMENT_TERM, title = DEPLOYMENT_TERM),
161                 DPT_TERM:VocabTermItem('DPT - NOT YET SET UP', DPT_TERM, title = 'Data Production Tool'),
162                 OBS_TERM:VocabTermItem('OBS - NOT YET SET UP', OBS_TERM, title = 'Observation Station'),
163                 GRANULE_TERM:VocabTermItem('GRAN - NOT YET SET UP', GRANULE_TERM, title = 'Data Granule'),
164                 DE_TERM:VocabTermItem('DE - NOT YET SET UP', DE_TERM, title = 'Data Entity'),
165                 
166                 LIDAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG02', title = "Lidar"),
167                 RADAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG03', title = "Radar"),
168                 SONDE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG04', title = "Sonde"),
169                 NAVIGATION_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG05', title = "Navigation"),
170                 GAS_CHROMATOGRAPH_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG06', title = "Gas Chromatograph"),
171                 SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG07', title = SPECTROMETER_TERM),
172                 MASS_SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG08', title = MASS_SPECTROMETER_TERM),
173                 MET_SENSOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG09', title = MET_SENSOR_TERM),
174                 DOAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG10', title = DOAS_TERM),
175                 ASOZ_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG11', title = ASOZ_TERM),
176                 RADIOMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG12', title = RADIOMETER_TERM),
177                 FAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG13', title = FAGE_TERM),
178                 IMAGER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG14', title = IMAGER_TERM),
179                 FILTER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG15', title = FILTER_TERM),
180                 PARTICLE_COUNTER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG16', title = PARTICLE_COUNTER_TERM),
181                 SAMPLER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG17', title = SAMPLER_TERM),
182                 OTHER_INSTRUMENT_TYPE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG99', title = OTHER_INSTRUMENT_TYPE_TERM),
183                 MODEL_TERM:VocabTermItem('dgModel - NOT YET SET UP', MODEL_TERM, title = MODEL_TERM),
184                 INSTRUMENT_TERM:VocabTermItem('dgInstrument - NOT YET SET UP', INSTRUMENT_TERM, title = INSTRUMENT_TERM),
185                 
186                 SIMULATION_TERM:VocabTermItem('NOT YET SET UP', SIMULATION_TERM, title = SIMULATION_TERM),
187                 ANALYSIS_TERM:VocabTermItem('NOT YET SET UP', ANALYSIS_TERM, title = ANALYSIS_TERM),
188                 MEASUREMENT_TERM:VocabTermItem('NOT YET SET UP', MEASUREMENT_TERM, title = MEASUREMENT_TERM),
189                 DATA_COLLECTION_TERM:VocabTermItem('NOT YET SET UP', DATA_COLLECTION_TERM, title = DATA_COLLECTION_TERM),
190                 DATA_PROJECT_TERM:VocabTermItem('NOT YET SET UP', DATA_PROJECT_TERM, title = DATA_PROJECT_TERM),
191                 DATA_CAMPAIGN_TERM:VocabTermItem('NOT YET SET UP', DATA_CAMPAIGN_TERM, title = DATA_CAMPAIGN_TERM),
192                 DATA_INVESTIGATION_TERM:VocabTermItem('NOT YET SET UP', DATA_INVESTIGATION_TERM, title = DATA_INVESTIGATION_TERM),
193                 FLIGHT_TERM:VocabTermItem('NOT YET SET UP', FLIGHT_TERM, title = FLIGHT_TERM),
194                 CRUISE_TERM:VocabTermItem('NOT YET SET UP', CRUISE_TERM, title = CRUISE_TERM),
195                 FUNDING_PROGRAM_TERM:VocabTermItem('NOT YET SET UP', FUNDING_PROGRAM_TERM, title = FUNDING_PROGRAM_TERM),
196                 DEPLOYMENT_TERM:VocabTermItem('NOT YET SET UP', DEPLOYMENT_TERM, title = DEPLOYMENT_TERM),
197                 
198                 STATIONARY_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', STATIONARY_PLATFORM_TERM, title = STATIONARY_PLATFORM_TERM),
199                 MOVING_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', MOVING_PLATFORM_TERM, title = MOVING_PLATFORM_TERM),
200                 LAND_STATION_TERM:VocabTermItem('NOT YET SET UP', LAND_STATION_TERM, title = LAND_STATION_TERM),
201                 MOORING_TERM:VocabTermItem('NOT YET SET UP', MOORING_TERM, title = MOORING_TERM),
202                 STATION_GROUP_TERM:VocabTermItem('NOT YET SET UP', STATION_GROUP_TERM, title = STATION_GROUP_TERM),
203                 SHIP_TERM:VocabTermItem('NOT YET SET UP', SHIP_TERM, title = SHIP_TERM),
204                 AIRCRAFT_TERM:VocabTermItem('NOT YET SET UP', AIRCRAFT_TERM, title = AIRCRAFT_TERM),
205                 SATELLITE_TERM:VocabTermItem('NOT YET SET UP', SATELLITE_TERM, title = SATELLITE_TERM),
206                 COMPUTER_TERM:VocabTermItem('NOT YET SET UP', COMPUTER_TERM, title = COMPUTER_TERM)
207                 }
208
209    ATOM_TYPES = [DE_TERM, GRANULE_TERM, ACTIVITY_DEPLOYMENT_TERM, ACTIVITY_TERM, DPT_TERM, OBS_TERM]
210   
211    DEPLOYABLE_ATOM_TYPES = [ACTIVITY_TERM, DPT_TERM, OBS_TERM]
212
213    PROVIDER_TYPES = [BADC_TERM, NEODC_TERM]
214
215    ONLINE_REF_TYPES = [ OBJECT_PAGE_TERM, DATA_URL_TERM, DATA_PAGE_TERM, \
216                         CURATOR_PAGE_TERM, EXT_METADATA_TERM, METADATA_SOURCE_TERM, \
217                         NUM_SIM_TERM, \
218                         OPENDAP_TERM, THREDDS_TERM, WMS_TERM, WCS_TERM, WFS_TERM, \
219                         DATA_EXTRACTOR_TERM, FILE_BROWSER_TERM, LAS_TERM, LOGO_TERM]
220#URI_TERM, \
221
222    ATOM_CATEGORY = "atom"
223    DEPLOYABLE_ATOM_CATEGORY = "deployableatom"
224    ONLINE_REF_CATEGORY = "onlineref"
225    PROVIDER_CATEGORY = "provider"
226   
227    # A dictionary to group the various valid subtypes of atoms - grouped by their
228    # main type
229    SUBTYPE_TERMS = {
230                     DPT_TERM: [
231                        LIDAR_TERM, RADAR_TERM, \
232                        SONDE_TERM, NAVIGATION_TERM, \
233                        GAS_CHROMATOGRAPH_TERM, SPECTROMETER_TERM, \
234                        MASS_SPECTROMETER_TERM,
235                        MET_SENSOR_TERM, DOAS_TERM,
236                        ASOZ_TERM, RADIOMETER_TERM,
237                        FAGE_TERM, IMAGER_TERM,
238                        FILTER_TERM, PARTICLE_COUNTER_TERM,
239                        SAMPLER_TERM, OTHER_INSTRUMENT_TYPE_TERM,
240                        MODEL_TERM, INSTRUMENT_TERM
241                        ],
242                     DE_TERM: [
243                        SIMULATION_TERM, ANALYSIS_TERM, \
244                        MEASUREMENT_TERM       
245                        ],
246                     ACTIVITY_TERM: [
247                        DATA_COLLECTION_TERM, DATA_PROJECT_TERM, \
248                        DATA_CAMPAIGN_TERM, DATA_INVESTIGATION_TERM, \
249                        FLIGHT_TERM, CRUISE_TERM, \
250                        FUNDING_PROGRAM_TERM, DEPLOYMENT_TERM
251                        ],
252                     OBS_TERM: [
253                        STATIONARY_PLATFORM_TERM, MOVING_PLATFORM_TERM, \
254                        LAND_STATION_TERM, MOORING_TERM, \
255                        STATION_GROUP_TERM, SHIP_TERM, \
256                        AIRCRAFT_TERM, SATELLITE_TERM, \
257                        COMPUTER_TERM
258                        ]
259                     }
260   
261    ONE_HOUR = 3600.0
262   
263    VOCAB_SERVER_URL = 'http://vocab.ndg.nerc.ac.uk/clients/whatLists'
264
265    # TODO: this should be more configurable, really
266    BROWSE_SERVER_URL = "http://badc.nerc.ac.uk:8082"
267    BROWSE_STEM_URL = "/view/"
268    BROWSE_ROOT_URL = BROWSE_SERVER_URL + BROWSE_STEM_URL
269   
270    BADC_BROWSE_ROOT = 'http://badc.nerc.ac.uk/browse'
271
272    def __init__(self):
273        logging.info("Setting up VocabTermData object")
274        self.VOCAB_DATA_PAGE = None
275        # introduce slight delay here
276        self.REFRESH_TIME = time.time() - 2.0
277        self.latestTermVersion = {}
278        logging.info("VocabTermData object set up")
279
280
281    def isValidSubType(self, mainType, subType):
282        '''
283        Determine whether a specified subtype is valid for a particular
284        main type
285        @param mainType: term ID of the main type of the data
286        @param subType: term ID of the subtype of the data
287        @return: True if the subtype is valid
288        @raise VocabTermDataError: if the mainType is not recognised or the subType is not valid
289        '''
290        if not self.SUBTYPE_TERMS.has_key(mainType):
291            errorMessage = "Error: unrecognised data type: '%s'" %mainType
292            logging.error(errorMessage)
293            raise ValueError(errorMessage)
294
295        if subType in self.SUBTYPE_TERMS[mainType]:
296            return True
297           
298        errorMessage = "Error: subtype, '%s' is not valid for data type, '%s'" \
299            %(subType, mainType)
300        raise VocabTermDataError(errorMessage)
301
302
303    def getValidTypes(self, category):
304        '''
305        Return a list of the valid types available for a particular data category
306        @param category: type of term info to look up - e.g. atoms or providers
307        - this should be specified using the ..._CATEGORY instance variables
308        defined above
309        @return list of VocabTermInfo objects for the specified category
310        @raise VocabTermDataError: if unrecognised data category
311        '''
312        logging.debug("Lookup up list of valid %s types" %category)
313        catList = []
314        if category == self.ATOM_CATEGORY:
315            catList = self.ATOM_TYPES
316        elif category == self.DEPLOYABLE_ATOM_CATEGORY:
317            catList = self.DEPLOYABLE_ATOM_TYPES
318        elif category == self.PROVIDER_CATEGORY:
319            catList = self.PROVIDER_TYPES
320        elif category == self.ONLINE_REF_CATEGORY:
321            catList = self.ONLINE_REF_TYPES
322        else:
323            errorMessage = "Unrecognised data category, '%s'" %category
324            raise VocabTermDataError(errorMessage)
325           
326        types = []
327        for st in catList:
328            types.append(self.TERM_DATA[st])
329        return types
330
331
332    def tidySubTypeTitle(self, title):
333        '''
334        Tidy up the title of subtypes slightly, if need be
335        - NB, these are inherited from original moles format so are
336        not really 'human readable'
337        '''
338        if title and title.startswith('dg'):
339            title = title[2:]
340            # and fix any camelcase - putting into words
341            title = re.sub('([A-Z])', r" \1", title).strip()
342
343        return title
344
345    def getValidSubTypes(self, atomType):
346        '''
347        Get list of subtypes that are valid wrt a specified atom type
348        @param atomType: term ID for the atom type whose subtypes need to
349        be looked up
350        @return: list of valid subtypes
351        '''
352        logging.debug("Lookup up subtypes for atom type, '%s'" %atomType)
353        subTypes = self.SUBTYPE_TERMS.get(atomType) or []
354        types = []
355        for st in subTypes:
356            item = self.TERM_DATA[st]
357            item.title = self.tidySubTypeTitle(item.title)
358            types.append(item)
359        logging.debug("Found subtypes: %s" %subTypes)
360        return types
361
362       
363    def getTermFromTitle(self, title):
364        '''
365        Given a term title/label, get back the related term id
366        @param title: title/label of term id to retrieve
367        @raise VocabTermDataError: if more than one title or no title is returned 
368        '''
369        termID = []
370        for val in self.TERM_DATA.itervalues():
371            if val.title == title:
372                termID.append(val.termID)
373       
374        if len(termID) != 1:
375            errorMessage = "Error: could not accurately determine the vocab term \
376                ID for the label, '%s' - %s values returned" %(title, len(termID))
377            raise VocabTermDataError(errorMessage)
378
379        return termID[0]
380       
381
382    def _getVocabDataPage(self):
383        '''
384        Getter method to allow regular refreshing of data
385        '''
386        if self.REFRESH_TIME < time.time():
387            self.getVocabServerData()
388       
389        return self.VOCAB_DATA_PAGE
390
391   
392    def getVocabServerData(self):
393        '''
394        Retrieve the web page containing the versioning info for the various vocab terms
395        '''
396        logging.info("Retrieving vocab server data page")
397        self.VOCAB_DATA_PAGE = openURLWithDefaultProxy(self.VOCAB_SERVER_URL)
398        self.REFRESH_TIME = time.time() + self.ONE_HOUR
399        logging.info("Vocab server data retrieved")
400
401
402    def getLatestTermVersions(self):
403        '''
404        Retrieve the latest versions of the vocab terms required
405        - NB, refreshes the data on an hourly basis, if necessary
406        '''
407        # now need to parse the returned data to get the current version number
408        for termName in self.TERM_DATA:
409            if termName not in self.latestTermVersion:
410                self.getLatestTermVersion(termName)
411
412                   
413    def getLatestTermVersion(self, termName):
414        '''
415        Parse the vocab server data and determine the latest version number of the term with the specified name
416        @param termName: name of term whose current version needs to be established
417        '''
418        logging.info("Getting latest term version for term, '%s'" %termName)
419        # check for recent data - NB, the term will probably share a base url with other terms
420        # so may have already have the correct URL set up
421        if termName not in self.latestTermVersion or self.REFRESH_TIME < time.time():
422   
423            uri = self.TERM_DATA[termName].vocabURL
424            currentVersion = self._getURIVersion(uri)
425
426            self.latestTermVersion[termName] = str(currentVersion)
427            #if currentVersion is not None:
428            #    if currentVersion:
429            #        self.latestTermVersion[termName] = str(currentVersion)
430            #    else:
431            #        self.latestTermVersion[termName] = str(uri)
432        logging.info("Latest term version for term, '%s' retrieved" %termName)
433
434
435    def _getURIVersion(self, uri):
436        '''
437        Parse the server data for a specified uri and return the latest version number of it
438        @param uri: uri to look for
439        @raise VocabTermDataError: if specified uri not found in vocab server
440        @return version of vocab uri, '' if uri contains latest version, None if uri not found
441        '''
442        logging.debug("Getting current version number of uri, '%s'" %uri)
443        # allow the data to be refreshed on an hourly basis
444        pageData = self._getVocabDataPage()
445       
446        uri = uri.replace('/term/','/list/')
447        if uri.endswith('/'):
448            uri = uri.rstrip('/')
449        # NB, the uri may already have a version number included in it - so allow this in the search
450        regExp = re.compile(r'(' + uri + '(/(\d{1,}))?)')
451        currentVersion = None
452        foundTerm = False
453        for termVals in regExp.findall(pageData):
454            foundTerm = True
455            version = termVals[2]
456            # version number must be included in original uri, if match found without a version number
457            # being split out - so ignore this - since the original uri is already specified in full
458            if not version:
459                logging.debug("Specified URI already contains current version")
460                return ''
461
462            # TODO: check how the versioning system will be done - i.e. 3.2.4 type versioning will cause this to break
463            if not currentVersion:
464                currentVersion = int(version)
465            elif int(version) > currentVersion:
466                currentVersion = int(version)
467
468        if currentVersion == None:
469            errorMessage = "Could not find information in vocab server for uri, '%s' - exiting" %uri
470            logging.error(errorMessage)
471            # TODO: uncomment the ValueError once all the vocab terms have been defined
472            #raise VocabTermDataError(errorMessage)
473        else:
474            logging.debug("Version number found: '%s'" %currentVersion)
475
476        return currentVersion
477
478       
479    def getTermCurrentVocabURL(self, termName):
480        '''
481        Get the current URL on the vocab server to the specified term
482        @param termName: name of term whose URL to return
483        @raise VocabTermDataError: if term not found
484        '''
485        logging.debug("Looking up vocab data for term: '%s'" %termName)
486       
487        baseURL = None
488        if termName not in self.TERM_DATA:
489           
490            # NB, some item term IDs are different from their item name; this was
491            # to allow the import of historical data and should be fixed at some
492            # point
493            foundTerm = False
494            for key, item in self.TERM_DATA.items():
495                if termName == item.termID:
496                    termName = key
497                    foundTerm = True
498                    baseURL = item.vocabURL
499                    break
500                   
501            if not foundTerm:
502                raise VocabTermDataError("Could not find term, '%s' in \
503                    defined list of valid vocab terms - exiting" %termName)
504        else:
505            baseURL = self.TERM_DATA[termName].vocabURL
506           
507        if termName not in self.latestTermVersion:
508            self.getLatestTermVersion(termName)
509
510        uri = None
511        if termName in self.latestTermVersion:
512            uri = baseURL + \
513                "/" + self.latestTermVersion[termName] + "/" + \
514                self.TERM_DATA[termName].termID
515            logging.debug("Returning vocab URL: '%s'" %uri)
516        else:
517            logging.debug("URL not found - returning base URL, '%s'" %baseURL)
518            uri = baseURL
519           
520        return  uri
521   
522   
523    def getCurrentVocabURI(self, uri):
524        '''
525        Look up a specified URI and return the current version of it
526        '''
527        logging.debug("Looking up current version of uri: '%s'" %uri)
528        currentVersion = self._getURIVersion(uri)
529        logging.debug("URI version looked up")
530        # NB, this can validly return None or '' - ignore both cases
531        if currentVersion:
532            if not uri.endswith('/'):
533                uri += '/'
534            return uri + str(currentVersion)
535        return uri
536   
537    def getVTI(self, term):
538        '''
539        Return the vocab term item for the specified term
540        @param term: term ID - one of the constants defined above
541        @return VocabTermItem corresponding to term
542        @raise VocabTermDataError if term not found
543        '''
544        vti = self.TERM_DATA.get(term)
545        if not vti:
546            raise VocabTermDataError("No info for term, '%s', found" %term)
547   
548        return vti
549   
550   
551    def getTermItemfromFullVocabURI(self, uri):
552        '''
553        Given a full term id - i.e. with vocab uri + version + term ID
554        determine the correct vocab data term item
555        @param uri: full term ID with version + vocab uri
556        @return vocab term item corresponding to the input uri
557        @raise VocabTermDataError if term ID not found
558        '''
559        logging.debug("Determining term ID for uri, '%s'" %uri)
560        termID = uri.split('/')[-1]
561
562        return self.getTermItemfromURIAndTerm(uri, termID)
563   
564   
565    def getTermItemfromURIAndTerm(self, uri, termID):
566        '''
567        Given a term id and its uri
568        determine the correct vocab data term item
569        @param uri: base uri for vocab term - i.e. without versioning info
570        @param termID: term ID to look up
571        @return vocab term item corresponding to the input uri/term
572        @raise VocabTermDataError if term ID not found
573        '''
574        logging.debug("Determining term item for uri, '%s', term name, '%s'" %(uri, termID))
575        vti = None
576        if self.TERM_DATA.get(termID) and uri.startswith(self.TERM_DATA[termID].vocabURL):
577            vti = self.TERM_DATA[termID]
578        else:
579            for item in self.TERM_DATA.itervalues():
580                if item.termID == termID and uri.startswith(item.vocabURL):
581                    vti = item
582                    break
583       
584        if not vti:
585            raise VocabTermDataError("Unrecognised term URI: '%s'" %uri)
586
587        logging.debug("Found matching term - '%s'" %vti.termID)
588        return vti
589   
590   
591    def isDeployable(self, vti):
592        '''
593        Determines whether a vocab term item can be used as part of a deployment
594        @param vti: vocab term item to check
595        @return True if allowed as part of deployment, false otherwise
596        '''
597        logging.debug("Checking if item is deployable")
598        if vti:
599            if vti.termID == self.ACTIVITY_TERM or \
600                vti.termID == self.DPT_TERM or \
601                vti.termID == self.OBS_TERM:
602                logging.debug("- item is deployable")
603                return True
604       
605        logging.debug("- item is not deployable")
606        return False
607   
608   
609    def isDeployment(self, vti):
610        '''
611        Determines whether a vocab term item is a deployment item
612        @param vti: vocab term item to check
613        @return True if a deployment, false otherwise
614        '''
615        logging.debug("Checking if item is a deployment")
616        if vti:
617            if vti.termID == self.DEPLOYMENT_TERM:
618                logging.debug("- item is a deployment")
619                return True
620       
621        logging.debug("- item is not a deployment")
622        return False
623   
624   
625    def isGranule(self, vti):
626        '''
627        Determines whether a vocab term item is a granule item
628        @param vti: vocab term item to check
629        @return True if a granule, false otherwise
630        '''
631        logging.debug("Checking if item is a granule")
632        if vti:
633            if vti.termID == self.GRANULE_TERM:
634                logging.debug("- item is a granule")
635                return True
636       
637        logging.debug("- item is not a granule")
638        return False
Note: See TracBrowser for help on using the repository browser.