source: exist/trunk/python/ndgUtils/vocabtermdata.py @ 4505

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/vocabtermdata.py@4505
Revision 4505, 28.0 KB checked in by cbyrom, 12 years ago (diff)

Simplify lookup of current vocab term url.

Line 
1#!/usr/bin/env python
2'''
3 Class to store and access the various vocab term data
4 
5 @author: C Byrom, Tessella Jul 2008
6'''
7import sys, logging, commands, string, os, time, re, urllib
8   
9def isValidTermURI(uri):
10    '''
11    Determines whether a specific vocab term uri is valid - NB, using the
12    utilities.checkURL method won't typically work since the RDF data is
13    exposed directly - i.e. without HEAD information which is usually looked
14    for
15    @param uri: vocab term uri to check
16    @return: True if valid, false otherwise
17    '''
18    logging.debug("Checking vocab term uri, '%s'" %uri)
19    try:
20        page = urllib.urlopen(uri)
21        pageData = page.read()
22        if pageData.find('<rdf:RDF') > -1:
23            logging.debug("- found valid term")
24            return True
25       
26    except Exception, e:
27        logging.error("Exception thrown whilst verifying uri: '%s'" %e.message)
28
29    logging.debug("- term appears to be invalid")
30    return False
31                     
32
33class VocabTermItem(object):
34    '''
35    Class representing single vocab term item
36    '''
37    def __init__(self, vocabURL, termID, title=None):
38        self.vocabURL = vocabURL
39        self.termID = termID
40        self.title = title
41
42
43class VocabTermDataError(ValueError):
44    """
45    Exception handling for VocabTermData class.
46    """
47    def __init__(self, msg):
48        logging.error(msg)
49        ValueError.__init__(self, msg)
50
51
52class VocabTermData(object):
53    '''
54    Class representing vocab term data - including
55    methods to look these up to ensure they are current
56    '''
57
58    OBJECT_PAGE_TERM = 'ObjectPage'
59    DATA_URL_TERM = 'DataURL'
60    DATA_PAGE_TERM = 'DataPage'
61    CURATOR_PAGE_TERM = 'CuratorPage'
62    EXT_METADATA_TERM = 'ExtMetadata'
63    METADATA_SOURCE_TERM = 'MetadataSource'
64    URI_TERM = 'URI'
65    LOGO_TERM = 'LOGO'
66    NUM_SIM_TERM = 'NumSim'
67    OPENDAP_TERM = 'OPENDAP'
68    THREDDS_TERM = 'THREDDS'
69    WMS_TERM = 'WMS'
70    WCS_TERM = 'WCS'
71    WFS_TERM = 'WFS'
72    LAS_TERM = 'LAS'
73    DATA_EXTRACTOR_TERM = 'DataExtractor' 
74    FILE_BROWSER_TERM = 'FileBrowser'   
75    CSML_TERM = 'CSML'
76   
77    ACTIVITY_TERM = 'ACTIVITY'
78    DPT_TERM = 'DPT'
79    OBS_TERM = 'OBS'
80    GRANULE_TERM = 'GRANULE'
81    DE_TERM = "DE"
82   
83    # dpt subtypes
84    LIDAR_TERM = "dgLidar"
85    RADAR_TERM = "dgRadar"
86    SONDE_TERM = "dgSonde"
87    NAVIGATION_TERM = "dgNavigation"
88    GAS_CHROMATOGRAPH_TERM = "dgGasChromatograph"
89    SPECTROMETER_TERM = "dgSpectrometer"
90    MASS_SPECTROMETER_TERM = "dgMassSpectrometer"
91    MET_SENSOR_TERM = "dgMetSensor"
92    DOAS_TERM = "dgDOAS"
93    ASOZ_TERM = "dgASOZ"
94    RADIOMETER_TERM = "dgRadiometer"
95    FAGE_TERM = "dgFAGE"
96    IMAGER_TERM = "dgImager"
97    FILTER_TERM = "dgFilter"
98    PARTICLE_COUNTER_TERM = "dgParticleCounter"
99    SAMPLER_TERM = "dgSampler"
100    OTHER_INSTRUMENT_TYPE_TERM = "dgOtherInstrumentType"
101    MODEL_TERM = "dgModel"
102    INSTRUMENT_TERM = "dgInstrument"
103   
104    # de subtypes
105    SIMULATION_TERM = "dgSimulation"
106    ANALYSIS_TERM = "dgAnalysis"
107    MEASUREMENT_TERM = "dgMeasurement"
108   
109    # activity subtypes
110    DATA_COLLECTION_TERM = "dgActivityDataCollection"
111    DATA_PROJECT_TERM = "dgActivityDataProject"
112    DATA_CAMPAIGN_TERM = "dgActivityDataCampaign"
113    DATA_INVESTIGATION_TERM = "dgActivityDataInvestigation"
114    FLIGHT_TERM = "dgFlight"
115    CRUISE_TERM = "dgCruise"
116    FUNDING_PROGRAM_TERM = "dgFundingProgram"
117    DEPLOYMENT_TERM = "Deployment"
118    # NB, this is a specialised activity with the subtype deployment pre-set
119    ACTIVITY_DEPLOYMENT_TERM = "ActivityDeployment"
120   
121    # obs subtypes
122    STATIONARY_PLATFORM_TERM = "dgStationaryPlatform"
123    MOVING_PLATFORM_TERM = "dgMovingPlatform"
124    LAND_STATION_TERM = "dgLandStation"
125    MOORING_TERM = "dgMooring"
126    STATION_GROUP_TERM = "dgStationGroup"
127    SHIP_TERM = "dgShip"
128    AIRCRAFT_TERM = "dgAircraft"
129    SATELLITE_TERM = "dgSatellite"
130    COMPUTER_TERM = "dgComputer"
131
132    # provider types
133    BADC_TERM = 'badc.nerc.ac.uk'
134    NEODC_TERM = 'neodc.nerc.ac.uk'
135   
136    TERM_DATA = {
137                 OBJECT_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '1', title = 'Object Home Page'),
138                 DATA_URL_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '2', title = 'Data URL'),
139                 DATA_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '3', title = 'Data Home Page'),
140                 CURATOR_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '4', title = 'Curator Home Page'),
141                 EXT_METADATA_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '5', title = 'Extended Metadata'),
142                 METADATA_SOURCE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '6', title = 'Original Metadata Source'),
143                 #URI_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '3URI', title = 'Data Home Page'),
144                 
145                 BADC_TERM:VocabTermItem('NOT YET SET UP', BADC_TERM, title = 'British Atmospheric Data Centre'),
146                 NEODC_TERM:VocabTermItem('NOT YET SET UP', NEODC_TERM, title = 'NERC Earth Observation Data Centre'),
147                 
148                 LOGO_TERM:VocabTermItem('NOT YET SET UP', LOGO_TERM, title = 'Logo'),
149                 NUM_SIM_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '7', title = 'NumSim description'),
150                 OPENDAP_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU010', title = 'GET DATA &gt; OPENDAP DATA (DODS)'),
151                 THREDDS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU013', title = 'GET DATA &gt; THREDDS DATA'),
152                 WMS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU026', title = 'GET SERVICE &gt; GET WEB MAP SERVICE (WMS)'),
153                 WCS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU024', title = 'GET SERVICE &gt; GET WEB COVERAGE SERVICE (WCS)'),
154                 WFS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU025', title = 'GET SERVICE &gt; GET WEB FEATURE SERVICE (WFS)'),
155                 LAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU006', title = 'GET DATA &gt; LAS'),
156                 DATA_EXTRACTOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '8', title = 'NDG DataExtractor'),
157                 FILE_BROWSER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '9', title = 'File Browser'),
158                 CSML_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N021', '1'),
159                 ACTIVITY_TERM:VocabTermItem('Activity - NOT YET SET UP', ACTIVITY_TERM, title = 'Activity'),
160                 ACTIVITY_DEPLOYMENT_TERM:VocabTermItem('Activity Deployment - NOT YET SET UP', ACTIVITY_DEPLOYMENT_TERM, title = 'Activity Deployment'),
161                 DPT_TERM:VocabTermItem('DPT - NOT YET SET UP', DPT_TERM, title = 'Data Production Tool'),
162                 OBS_TERM:VocabTermItem('OBS - NOT YET SET UP', OBS_TERM, title = 'Observation Station'),
163                 GRANULE_TERM:VocabTermItem('GRAN - NOT YET SET UP', GRANULE_TERM, title = 'Data Granule'),
164                 DE_TERM:VocabTermItem('DE - NOT YET SET UP', DE_TERM, title = 'Data Entity'),
165                 
166                 LIDAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG02', title = "Lidar"),
167                 RADAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG03', title = "Radar"),
168                 SONDE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG04', title = "Sonde"),
169                 NAVIGATION_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG05', title = "Navigation"),
170                 GAS_CHROMATOGRAPH_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG06', title = "Gas Chromatograph"),
171                 SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG07', title = SPECTROMETER_TERM),
172                 MASS_SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG08', title = MASS_SPECTROMETER_TERM),
173                 MET_SENSOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG09', title = MET_SENSOR_TERM),
174                 DOAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG10', title = DOAS_TERM),
175                 ASOZ_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG11', title = ASOZ_TERM),
176                 RADIOMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG12', title = RADIOMETER_TERM),
177                 FAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG13', title = FAGE_TERM),
178                 IMAGER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG14', title = IMAGER_TERM),
179                 FILTER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG15', title = FILTER_TERM),
180                 PARTICLE_COUNTER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG16', title = PARTICLE_COUNTER_TERM),
181                 SAMPLER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG17', title = SAMPLER_TERM),
182                 OTHER_INSTRUMENT_TYPE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG99', title = OTHER_INSTRUMENT_TYPE_TERM),
183                 MODEL_TERM:VocabTermItem('dgModel - NOT YET SET UP', MODEL_TERM, title = MODEL_TERM),
184                 INSTRUMENT_TERM:VocabTermItem('dgInstrument - NOT YET SET UP', INSTRUMENT_TERM, title = INSTRUMENT_TERM),
185                 
186                 SIMULATION_TERM:VocabTermItem('NOT YET SET UP', SIMULATION_TERM, title = SIMULATION_TERM),
187                 ANALYSIS_TERM:VocabTermItem('NOT YET SET UP', ANALYSIS_TERM, title = ANALYSIS_TERM),
188                 MEASUREMENT_TERM:VocabTermItem('NOT YET SET UP', MEASUREMENT_TERM, title = MEASUREMENT_TERM),
189                 DATA_COLLECTION_TERM:VocabTermItem('NOT YET SET UP', DATA_COLLECTION_TERM, title = DATA_COLLECTION_TERM),
190                 DATA_PROJECT_TERM:VocabTermItem('NOT YET SET UP', DATA_PROJECT_TERM, title = DATA_PROJECT_TERM),
191                 DATA_CAMPAIGN_TERM:VocabTermItem('NOT YET SET UP', DATA_CAMPAIGN_TERM, title = DATA_CAMPAIGN_TERM),
192                 DATA_INVESTIGATION_TERM:VocabTermItem('NOT YET SET UP', DATA_INVESTIGATION_TERM, title = DATA_INVESTIGATION_TERM),
193                 FLIGHT_TERM:VocabTermItem('NOT YET SET UP', FLIGHT_TERM, title = FLIGHT_TERM),
194                 CRUISE_TERM:VocabTermItem('NOT YET SET UP', CRUISE_TERM, title = CRUISE_TERM),
195                 FUNDING_PROGRAM_TERM:VocabTermItem('NOT YET SET UP', FUNDING_PROGRAM_TERM, title = FUNDING_PROGRAM_TERM),
196                 DEPLOYMENT_TERM:VocabTermItem('NOT YET SET UP', DEPLOYMENT_TERM, title = DEPLOYMENT_TERM),
197                 
198                 STATIONARY_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', STATIONARY_PLATFORM_TERM, title = STATIONARY_PLATFORM_TERM),
199                 MOVING_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', MOVING_PLATFORM_TERM, title = MOVING_PLATFORM_TERM),
200                 LAND_STATION_TERM:VocabTermItem('NOT YET SET UP', LAND_STATION_TERM, title = LAND_STATION_TERM),
201                 MOORING_TERM:VocabTermItem('NOT YET SET UP', MOORING_TERM, title = MOORING_TERM),
202                 STATION_GROUP_TERM:VocabTermItem('NOT YET SET UP', STATION_GROUP_TERM, title = STATION_GROUP_TERM),
203                 SHIP_TERM:VocabTermItem('NOT YET SET UP', SHIP_TERM, title = SHIP_TERM),
204                 AIRCRAFT_TERM:VocabTermItem('NOT YET SET UP', AIRCRAFT_TERM, title = AIRCRAFT_TERM),
205                 SATELLITE_TERM:VocabTermItem('NOT YET SET UP', SATELLITE_TERM, title = SATELLITE_TERM),
206                 COMPUTER_TERM:VocabTermItem('NOT YET SET UP', COMPUTER_TERM, title = COMPUTER_TERM)
207                 }
208
209    ATOM_TYPES = [ACTIVITY_TERM, DE_TERM, DEPLOYMENT_TERM, DPT_TERM, GRANULE_TERM, OBS_TERM]
210   
211    DEPLOYABLE_ATOM_TYPES = [ACTIVITY_TERM, DPT_TERM, OBS_TERM]
212
213    PROVIDER_TYPES = [BADC_TERM, NEODC_TERM]
214
215    ONLINE_REF_TYPES = [ OBJECT_PAGE_TERM, DATA_URL_TERM, DATA_PAGE_TERM, \
216                         CURATOR_PAGE_TERM, EXT_METADATA_TERM, METADATA_SOURCE_TERM, \
217                         NUM_SIM_TERM, \
218                         OPENDAP_TERM, THREDDS_TERM, WMS_TERM, WCS_TERM, WFS_TERM, \
219                         DATA_EXTRACTOR_TERM, FILE_BROWSER_TERM, LAS_TERM, LOGO_TERM]
220#URI_TERM, \
221
222    ATOM_CATEGORY = "atom"
223    DEPLOYABLE_ATOM_CATEGORY = "deployableatom"
224    ONLINE_REF_CATEGORY = "onlineref"
225    PROVIDER_CATEGORY = "provider"
226   
227    # A dictionary to group the various valid subtypes of atoms - grouped by their
228    # main type
229    SUBTYPE_TERMS = {
230                     DPT_TERM: [
231                        LIDAR_TERM, RADAR_TERM, \
232                        SONDE_TERM, NAVIGATION_TERM, \
233                        GAS_CHROMATOGRAPH_TERM, SPECTROMETER_TERM, \
234                        MASS_SPECTROMETER_TERM,
235                        MET_SENSOR_TERM, DOAS_TERM,
236                        ASOZ_TERM, RADIOMETER_TERM,
237                        FAGE_TERM, IMAGER_TERM,
238                        FILTER_TERM, PARTICLE_COUNTER_TERM,
239                        SAMPLER_TERM, OTHER_INSTRUMENT_TYPE_TERM,
240                        MODEL_TERM, INSTRUMENT_TERM
241                        ],
242                     DE_TERM: [
243                        SIMULATION_TERM, ANALYSIS_TERM, \
244                        MEASUREMENT_TERM       
245                        ],
246                     ACTIVITY_TERM: [
247                        DATA_COLLECTION_TERM, DATA_PROJECT_TERM, \
248                        DATA_CAMPAIGN_TERM, DATA_INVESTIGATION_TERM, \
249                        FLIGHT_TERM, CRUISE_TERM, \
250                        FUNDING_PROGRAM_TERM, DEPLOYMENT_TERM
251                        ],
252                     OBS_TERM: [
253                        STATIONARY_PLATFORM_TERM, MOVING_PLATFORM_TERM, \
254                        LAND_STATION_TERM, MOORING_TERM, \
255                        STATION_GROUP_TERM, SHIP_TERM, \
256                        AIRCRAFT_TERM, SATELLITE_TERM, \
257                        COMPUTER_TERM
258                        ]
259                     }
260   
261    ONE_HOUR = 3600.0
262   
263    VOCAB_SERVER_URL = 'http://vocab.ndg.nerc.ac.uk/clients/whatLists'
264
265    BROWSE_ROOT_URL = "http://localhost:5000/view/"
266
267    def __init__(self):
268        logging.info("Setting up VocabTermData object")
269        self.VOCAB_DATA_PAGE = None
270        # introduce slight delay here
271        self.REFRESH_TIME = time.time() - 2.0
272        self.latestTermVersion = {}
273        logging.info("VocabTermData object set up")
274
275
276    def isValidSubType(self, mainType, subType):
277        '''
278        Determine whether a specified subtype is valid for a particular
279        main type
280        @param mainType: term ID of the main type of the data
281        @param subType: term ID of the subtype of the data
282        @return: True if the subtype is valid
283        @raise VocabTermDataError: if the mainType is not recognised or the subType is not valid
284        '''
285        if not self.SUBTYPE_TERMS.has_key(mainType):
286            errorMessage = "Error: unrecognised data type: '%s'" %mainType
287            logging.error(errorMessage)
288            raise ValueError(errorMessage)
289
290        if subType in self.SUBTYPE_TERMS[mainType]:
291            return True
292           
293        errorMessage = "Error: subtype, '%s' is not valid for data type, '%s'" \
294            %(subType, mainType)
295        raise VocabTermDataError(errorMessage)
296
297
298    def getValidTypes(self, category):
299        '''
300        Return a list of the valid types available for a particular data category
301        @param category: type of term info to look up - e.g. atoms or providers
302        - this should be specified using the ..._CATEGORY instance variables
303        defined above
304        @return list of VocabTermInfo objects for the specified category
305        @raise VocabTermDataError: if unrecognised data category
306        '''
307        logging.debug("Lookup up list of valid %s types" %category)
308        catList = []
309        if category == self.ATOM_CATEGORY:
310            catList = self.ATOM_TYPES
311        elif category == self.DEPLOYABLE_ATOM_CATEGORY:
312            catList = self.DEPLOYABLE_ATOM_TYPES
313        elif category == self.PROVIDER_CATEGORY:
314            catList = self.PROVIDER_TYPES
315        elif category == self.ONLINE_REF_CATEGORY:
316            catList = self.ONLINE_REF_TYPES
317        else:
318            errorMessage = "Unrecognised data category, '%s'" %category
319            raise VocabTermDataError(errorMessage)
320           
321        types = []
322        for st in catList:
323            types.append(self.TERM_DATA[st])
324        return types
325
326
327    def tidySubTypeTitle(self, title):
328        '''
329        Tidy up the title of subtypes slightly, if need be
330        - NB, these are inherited from original moles format so are
331        not really 'human readable'
332        '''
333        if title and title.startswith('dg'):
334            title = title[2:]
335            # and fix any camelcase - putting into words
336            title = re.sub('([A-Z])', r" \1", title).strip()
337
338        return title
339
340    def getValidSubTypes(self, atomType):
341        '''
342        Get list of subtypes that are valid wrt a specified atom type
343        @param atomType: term ID for the atom type whose subtypes need to
344        be looked up
345        @return: list of valid subtypes
346        '''
347        logging.debug("Lookup up subtypes for atom type, '%s'" %atomType)
348        subTypes = self.SUBTYPE_TERMS.get(atomType) or []
349        types = []
350        for st in subTypes:
351            item = self.TERM_DATA[st]
352            item.title = self.tidySubTypeTitle(item.title)
353            types.append(item)
354        logging.debug("Found subtypes: %s" %subTypes)
355        return types
356
357       
358    def getTermFromTitle(self, title):
359        '''
360        Given a term title/label, get back the related term id
361        @param title: title/label of term id to retrieve
362        @raise VocabTermDataError: if more than one title or no title is returned 
363        '''
364        termID = []
365        for val in self.TERM_DATA.itervalues():
366            if val.title == title:
367                termID.append(val.termID)
368       
369        if len(termID) != 1:
370            errorMessage = "Error: could not accurately determine the vocab term \
371                ID for the label, '%s' - %s values returned" %(title, len(termID))
372            raise VocabTermDataError(errorMessage)
373
374        return termID[0]
375       
376
377    def _getVocabDataPage(self):
378        '''
379        Getter method to allow regular refreshing of data
380        '''
381        if self.REFRESH_TIME < time.time():
382            self.getVocabServerData()
383       
384        return self.VOCAB_DATA_PAGE
385
386   
387    def getVocabServerData(self):
388        '''
389        Retrieve the web page containing the versioning info for the various vocab terms
390        '''
391        logging.info("Retrieving vocab server data page")
392        f = urllib.urlopen(self.VOCAB_SERVER_URL)
393        self.VOCAB_DATA_PAGE = f.read()
394        f.close()
395        self.REFRESH_TIME = time.time() + self.ONE_HOUR
396        logging.info("Vocab server data retrieved")
397
398
399    def getLatestTermVersions(self):
400        '''
401        Retrieve the latest versions of the vocab terms required
402        - NB, refreshes the data on an hourly basis, if necessary
403        '''
404        # now need to parse the returned data to get the current version number
405        for termName in self.TERM_DATA:
406            if termName not in self.latestTermVersion:
407                self.getLatestTermVersion(termName)
408
409                   
410    def getLatestTermVersion(self, termName):
411        '''
412        Parse the vocab server data and determine the latest version number of the term with the specified name
413        @param termName: name of term whose current version needs to be established
414        '''
415        logging.info("Getting latest term version for term, '%s'" %termName)
416        # check for recent data - NB, the term will probably share a base url with other terms
417        # so may have already have the correct URL set up
418        if termName not in self.latestTermVersion or self.REFRESH_TIME < time.time():
419   
420            uri = self.TERM_DATA[termName].vocabURL
421            currentVersion = self._getURIVersion(uri)
422
423            self.latestTermVersion[termName] = str(currentVersion)
424            #if currentVersion is not None:
425            #    if currentVersion:
426            #        self.latestTermVersion[termName] = str(currentVersion)
427            #    else:
428            #        self.latestTermVersion[termName] = str(uri)
429        logging.info("Latest term version for term, '%s' retrieved" %termName)
430
431
432    def _getURIVersion(self, uri):
433        '''
434        Parse the server data for a specified uri and return the latest version number of it
435        @param uri: uri to look for
436        @raise VocabTermDataError: if specified uri not found in vocab server
437        @return version of vocab uri, '' if uri contains latest version, None if uri not found
438        '''
439        logging.debug("Getting current version number of uri, '%s'" %uri)
440        # allow the data to be refreshed on an hourly basis
441        pageData = self._getVocabDataPage()
442       
443        uri = uri.replace('/term/','/list/')
444        if uri.endswith('/'):
445            uri = uri.rstrip('/')
446        # NB, the uri may already have a version number included in it - so allow this in the search
447        regExp = re.compile(r'(' + uri + '(/(\d{1,}))?)')
448        currentVersion = None
449        foundTerm = False
450        for termVals in regExp.findall(pageData):
451            foundTerm = True
452            version = termVals[2]
453            # version number must be included in original uri, if match found without a version number
454            # being split out - so ignore this - since the original uri is already specified in full
455            if not version:
456                logging.debug("Specified URI already contains current version")
457                return ''
458
459            # TODO: check how the versioning system will be done - i.e. 3.2.4 type versioning will cause this to break
460            if not currentVersion:
461                currentVersion = int(version)
462            elif int(version) > currentVersion:
463                currentVersion = int(version)
464
465        if currentVersion == None:
466            errorMessage = "Could not find information in vocab server for uri, '%s' - exiting" %uri
467            logging.error(errorMessage)
468            # TODO: uncomment the ValueError once all the vocab terms have been defined
469            #raise VocabTermDataError(errorMessage)
470        else:
471            logging.debug("Version number found: '%s'" %currentVersion)
472
473        return currentVersion
474
475       
476    def getTermCurrentVocabURL(self, termName):
477        '''
478        Get the current URL on the vocab server to the specified term
479        @param termName: name of term whose URL to return
480        @raise VocabTermDataError: if term not found
481        '''
482        logging.debug("Looking up vocab data for term: '%s'" %termName)
483       
484        baseURL = None
485        if termName not in self.TERM_DATA:
486           
487            # NB, some item term IDs are different from their item name; this was
488            # to allow the import of historical data and should be fixed at some
489            # point
490            foundTerm = False
491            for key, item in self.TERM_DATA.items():
492                if termName == item.termID:
493                    termName = key
494                    foundTerm = True
495                    baseURL = item.vocabURL
496                    break
497                   
498            if not foundTerm:
499                raise VocabTermDataError("Could not find term, '%s' in \
500                    defined list of valid vocab terms - exiting" %termName)
501        else:
502            baseURL = self.TERM_DATA[termName].vocabURL
503           
504        if termName not in self.latestTermVersion:
505            self.getLatestTermVersion(termName)
506
507        uri = None
508        if termName in self.latestTermVersion:
509            uri = baseURL + \
510                "/" + self.latestTermVersion[termName] + "/" + \
511                self.TERM_DATA[termName].termID
512            logging.debug("Returning vocab URL: '%s'" %uri)
513        else:
514            logging.debug("URL not found - returning base URL, '%s'" %baseURL)
515            uri = baseURL
516           
517        return  uri
518   
519   
520    def getCurrentVocabURI(self, uri):
521        '''
522        Look up a specified URI and return the current version of it
523        '''
524        logging.debug("Looking up current version of uri: '%s'" %uri)
525        currentVersion = self._getURIVersion(uri)
526        logging.debug("URI version looked up")
527        # NB, this can validly return None or '' - ignore both cases
528        if currentVersion:
529            if not uri.endswith('/'):
530                uri += '/'
531            return uri + str(currentVersion)
532        return uri
533   
534    def getVTI(self, term):
535        '''
536        Return the vocab term item for the specified term
537        @param term: term ID - one of the constants defined above
538        @return VocabTermItem corresponding to term
539        @raise VocabTermDataError if term not found
540        '''
541        vti = self.TERM_DATA.get(term)
542        if not vti:
543            raise VocabTermDataError("No info for term, '%s', found" %term)
544   
545        return vti
546   
547   
548    def getTermItemfromFullVocabURI(self, uri):
549        '''
550        Given a full term id - i.e. with vocab uri + version + term ID
551        determine the correct vocab data term item
552        @param uri: full term ID with version + vocab uri
553        @return vocab term item corresponding to the input uri
554        @raise VocabTermDataError if term ID not found
555        '''
556        logging.debug("Determining term ID for uri, '%s'" %uri)
557        termID = uri.split('/')[-1]
558
559        return self.getTermItemfromURIAndTerm(uri, termID)
560   
561   
562    def getTermItemfromURIAndTerm(self, uri, termID):
563        '''
564        Given a term id and its uri
565        determine the correct vocab data term item
566        @param uri: base uri for vocab term - i.e. without versioning info
567        @param termID: term ID to look up
568        @return vocab term item corresponding to the input uri/term
569        @raise VocabTermDataError if term ID not found
570        '''
571        logging.debug("Determining term item for uri, '%s', term name, '%s'" %(uri, termID))
572        vti = None
573        if self.TERM_DATA.get(termID) and uri.startswith(self.TERM_DATA[termID].vocabURL):
574            vti = self.TERM_DATA[termID]
575        else:
576            for item in self.TERM_DATA.itervalues():
577                if item.termID == termID and uri.startswith(item.vocabURL):
578                    vti = item
579                    break
580       
581        if not vti:
582            raise VocabTermDataError("Unrecognised term URI: '%s'" %uri)
583
584        logging.debug("Found matching term - '%s'" %vti.termID)
585        return vti
586   
587   
588    def isDeployable(self, vti):
589        '''
590        Determines whether a vocab term item can be used as part of a deployment
591        @param vti: vocab term item to check
592        @return True if allowed as part of deployment, false otherwise
593        '''
594        logging.debug("Checking if item is deployable")
595        if vti:
596            if vti.termID == self.ACTIVITY_TERM or \
597                vti.termID == self.DPT_TERM or \
598                vti.termID == self.OBS_TERM:
599                logging.debug("- item is deployable")
600                return True
601       
602        logging.debug("- item is not deployable")
603        return False
604   
605   
606    def isDeployment(self, vti):
607        '''
608        Determines whether a vocab term item is a deployment item
609        @param vti: vocab term item to check
610        @return True if a deployment, false otherwise
611        '''
612        logging.debug("Checking if item is a deployment")
613        if vti:
614            if vti.termID == self.DEPLOYMENT_TERM:
615                logging.debug("- item is a deployment")
616                return True
617       
618        logging.debug("- item is not a deployment")
619        return False
620   
621   
622    def isGranule(self, vti):
623        '''
624        Determines whether a vocab term item is a granule item
625        @param vti: vocab term item to check
626        @return True if a granule, false otherwise
627        '''
628        logging.debug("Checking if item is a granule")
629        if vti:
630            if vti.termID == self.GRANULE_TERM:
631                logging.debug("- item is a granule")
632                return True
633       
634        logging.debug("- item is not a granule")
635        return False
Note: See TracBrowser for help on using the repository browser.