source: exist/trunk/python/ndgUtils/vocabtermdata.py @ 4494

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/vocabtermdata.py@4494
Revision 4494, 27.7 KB checked in by cbyrom, 12 years ago (diff)

Add new methods to lookup simple URLs and vocab term urls. NB, problems were encountered using the checkURL method, which uses httplib, when running with proxy server. Implement usage of new methods + fix small bug with keeping too many related links + tidy up unused imports.

Line 
1#!/usr/bin/env python
2'''
3 Class to store and access the various vocab term data
4 
5 @author: C Byrom, Tessella Jul 2008
6'''
7import sys, logging, commands, string, os, time, re, urllib
8   
9def isValidTermURI(uri):
10    '''
11    Determines whether a specific vocab term uri is valid - NB, using the
12    utilities.checkURL method won't typically work since the RDF data is
13    exposed directly - i.e. without HEAD information which is usually looked
14    for
15    @param uri: vocab term uri to check
16    @return: True if valid, false otherwise
17    '''
18    logging.debug("Checking vocab term uri, '%s'" %uri)
19    try:
20        page = urllib.urlopen(uri)
21        pageData = page.read()
22        if pageData.find('<rdf:RDF') > -1:
23            logging.debug("- found valid term")
24            return True
25       
26    except Exception, e:
27        logging.error("Exception thrown whilst verifying uri: '%s'" %e.message)
28
29    logging.debug("- term appears to be invalid")
30    return False
31                     
32
33class VocabTermItem(object):
34    '''
35    Class representing single vocab term item
36    '''
37    def __init__(self, vocabURL, termID, title=None):
38        self.vocabURL = vocabURL
39        self.termID = termID
40        self.title = title
41
42
43class VocabTermDataError(ValueError):
44    """
45    Exception handling for VocabTermData class.
46    """
47    def __init__(self, msg):
48        logging.error(msg)
49        ValueError.__init__(self, msg)
50
51
52class VocabTermData(object):
53    '''
54    Class representing vocab term data - including
55    methods to look these up to ensure they are current
56    '''
57
58    OBJECT_PAGE_TERM = 'ObjectPage'
59    DATA_URL_TERM = 'DataURL'
60    DATA_PAGE_TERM = 'DataPage'
61    CURATOR_PAGE_TERM = 'CuratorPage'
62    EXT_METADATA_TERM = 'ExtMetadata'
63    METADATA_SOURCE_TERM = 'MetadataSource'
64    URI_TERM = 'URI'
65    LOGO_TERM = 'LOGO'
66    NUM_SIM_TERM = 'NumSim'
67    OPENDAP_TERM = 'OPENDAP'
68    THREDDS_TERM = 'THREDDS'
69    WMS_TERM = 'WMS'
70    WCS_TERM = 'WCS'
71    WFS_TERM = 'WFS'
72    LAS_TERM = 'LAS'
73    DATA_EXTRACTOR_TERM = 'DataExtractor' 
74    FILE_BROWSER_TERM = 'FileBrowser'   
75    CSML_TERM = 'CSML'
76   
77    ACTIVITY_TERM = 'ACTIVITY'
78    DPT_TERM = 'DPT'
79    OBS_TERM = 'OBS'
80    GRANULE_TERM = 'GRANULE'
81    DE_TERM = "DE"
82   
83    # dpt subtypes
84    LIDAR_TERM = "dgLidar"
85    RADAR_TERM = "dgRadar"
86    SONDE_TERM = "dgSonde"
87    NAVIGATION_TERM = "dgNavigation"
88    GAS_CHROMATOGRAPH_TERM = "dgGasChromatograph"
89    SPECTROMETER_TERM = "dgSpectrometer"
90    MASS_SPECTROMETER_TERM = "dgMassSpectrometer"
91    MET_SENSOR_TERM = "dgMetSensor"
92    DOAS_TERM = "dgDOAS"
93    ASOZ_TERM = "dgASOZ"
94    RADIOMETER_TERM = "dgRadiometer"
95    FAGE_TERM = "dgFAGE"
96    IMAGER_TERM = "dgImager"
97    FILTER_TERM = "dgFilter"
98    PARTICLE_COUNTER_TERM = "dgParticleCounter"
99    SAMPLER_TERM = "dgSampler"
100    OTHER_INSTRUMENT_TYPE_TERM = "dgOtherInstrumentType"
101    MODEL_TERM = "dgModel"
102    INSTRUMENT_TERM = "dgInstrument"
103   
104    # de subtypes
105    SIMULATION_TERM = "dgSimulation"
106    ANALYSIS_TERM = "dgAnalysis"
107    MEASUREMENT_TERM = "dgMeasurement"
108   
109    # activity subtypes
110    DATA_COLLECTION_TERM = "dgActivityDataCollection"
111    DATA_PROJECT_TERM = "dgActivityDataProject"
112    DATA_CAMPAIGN_TERM = "dgActivityDataCampaign"
113    DATA_INVESTIGATION_TERM = "dgActivityDataInvestigation"
114    FLIGHT_TERM = "dgFlight"
115    CRUISE_TERM = "dgCruise"
116    FUNDING_PROGRAM_TERM = "dgFundingProgram"
117    DEPLOYMENT_TERM = "Deployment"
118    # NB, this is a specialised activity with the subtype deployment pre-set
119    ACTIVITY_DEPLOYMENT_TERM = "ActivityDeployment"
120   
121    # obs subtypes
122    STATIONARY_PLATFORM_TERM = "dgStationaryPlatform"
123    MOVING_PLATFORM_TERM = "dgMovingPlatform"
124    LAND_STATION_TERM = "dgLandStation"
125    MOORING_TERM = "dgMooring"
126    STATION_GROUP_TERM = "dgStationGroup"
127    SHIP_TERM = "dgShip"
128    AIRCRAFT_TERM = "dgAircraft"
129    SATELLITE_TERM = "dgSatellite"
130    COMPUTER_TERM = "dgComputer"
131
132    # provider types
133    BADC_TERM = 'badc.nerc.ac.uk'
134    NEODC_TERM = 'neodc.nerc.ac.uk'
135   
136    TERM_DATA = {
137                 OBJECT_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '1', title = 'Object Home Page'),
138                 DATA_URL_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '2', title = 'Data URL'),
139                 DATA_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '3', title = 'Data Home Page'),
140                 CURATOR_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '4', title = 'Curator Home Page'),
141                 EXT_METADATA_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '5', title = 'Extended Metadata'),
142                 METADATA_SOURCE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '6', title = 'Original Metadata Source'),
143                 #URI_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '3URI', title = 'Data Home Page'),
144                 
145                 BADC_TERM:VocabTermItem('NOT YET SET UP', BADC_TERM, title = 'British Atmospheric Data Centre'),
146                 NEODC_TERM:VocabTermItem('NOT YET SET UP', NEODC_TERM, title = 'NERC Earth Observation Data Centre'),
147                 
148                 LOGO_TERM:VocabTermItem('NOT YET SET UP', LOGO_TERM, title = 'Logo'),
149                 NUM_SIM_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '7', title = 'NumSim description'),
150                 OPENDAP_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU010', title = 'GET DATA &gt; OPENDAP DATA (DODS)'),
151                 THREDDS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU013', title = 'GET DATA &gt; THREDDS DATA'),
152                 WMS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU026', title = 'GET SERVICE &gt; GET WEB MAP SERVICE (WMS)'),
153                 WCS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU024', title = 'GET SERVICE &gt; GET WEB COVERAGE SERVICE (WCS)'),
154                 WFS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU025', title = 'GET SERVICE &gt; GET WEB FEATURE SERVICE (WFS)'),
155                 LAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU006', title = 'GET DATA &gt; LAS'),
156                 DATA_EXTRACTOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '8', title = 'NDG DataExtractor'),
157                 FILE_BROWSER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '9', title = 'File Browser'),
158                 CSML_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N021', '1'),
159                 ACTIVITY_TERM:VocabTermItem('Activity - NOT YET SET UP', ACTIVITY_TERM, title = 'Activity'),
160                 ACTIVITY_DEPLOYMENT_TERM:VocabTermItem('Activity Deployment - NOT YET SET UP', ACTIVITY_DEPLOYMENT_TERM, title = 'Activity Deployment'),
161                 DPT_TERM:VocabTermItem('DPT - NOT YET SET UP', DPT_TERM, title = 'Data Production Tool'),
162                 OBS_TERM:VocabTermItem('OBS - NOT YET SET UP', OBS_TERM, title = 'Observation Station'),
163                 GRANULE_TERM:VocabTermItem('GRAN - NOT YET SET UP', GRANULE_TERM, title = 'Data Granule'),
164                 DE_TERM:VocabTermItem('DE - NOT YET SET UP', DE_TERM, title = 'Data Entity'),
165                 
166                 LIDAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG02', title = "Lidar"),
167                 RADAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG03', title = "Radar"),
168                 SONDE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG04', title = "Sonde"),
169                 NAVIGATION_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG05', title = "Navigation"),
170                 GAS_CHROMATOGRAPH_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG06', title = "Gas Chromatograph"),
171                 SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG07', title = SPECTROMETER_TERM),
172                 MASS_SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG08', title = MASS_SPECTROMETER_TERM),
173                 MET_SENSOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG09', title = MET_SENSOR_TERM),
174                 DOAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG10', title = DOAS_TERM),
175                 ASOZ_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG11', title = ASOZ_TERM),
176                 RADIOMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG12', title = RADIOMETER_TERM),
177                 FAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG13', title = FAGE_TERM),
178                 IMAGER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG14', title = IMAGER_TERM),
179                 FILTER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG15', title = FILTER_TERM),
180                 PARTICLE_COUNTER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG16', title = PARTICLE_COUNTER_TERM),
181                 SAMPLER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG17', title = SAMPLER_TERM),
182                 OTHER_INSTRUMENT_TYPE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG99', title = OTHER_INSTRUMENT_TYPE_TERM),
183                 MODEL_TERM:VocabTermItem('dgModel - NOT YET SET UP', MODEL_TERM, title = MODEL_TERM),
184                 INSTRUMENT_TERM:VocabTermItem('dgInstrument - NOT YET SET UP', INSTRUMENT_TERM, title = INSTRUMENT_TERM),
185                 
186                 SIMULATION_TERM:VocabTermItem('NOT YET SET UP', SIMULATION_TERM, title = SIMULATION_TERM),
187                 ANALYSIS_TERM:VocabTermItem('NOT YET SET UP', ANALYSIS_TERM, title = ANALYSIS_TERM),
188                 MEASUREMENT_TERM:VocabTermItem('NOT YET SET UP', MEASUREMENT_TERM, title = MEASUREMENT_TERM),
189                 DATA_COLLECTION_TERM:VocabTermItem('NOT YET SET UP', DATA_COLLECTION_TERM, title = DATA_COLLECTION_TERM),
190                 DATA_PROJECT_TERM:VocabTermItem('NOT YET SET UP', DATA_PROJECT_TERM, title = DATA_PROJECT_TERM),
191                 DATA_CAMPAIGN_TERM:VocabTermItem('NOT YET SET UP', DATA_CAMPAIGN_TERM, title = DATA_CAMPAIGN_TERM),
192                 DATA_INVESTIGATION_TERM:VocabTermItem('NOT YET SET UP', DATA_INVESTIGATION_TERM, title = DATA_INVESTIGATION_TERM),
193                 FLIGHT_TERM:VocabTermItem('NOT YET SET UP', FLIGHT_TERM, title = FLIGHT_TERM),
194                 CRUISE_TERM:VocabTermItem('NOT YET SET UP', CRUISE_TERM, title = CRUISE_TERM),
195                 FUNDING_PROGRAM_TERM:VocabTermItem('NOT YET SET UP', FUNDING_PROGRAM_TERM, title = FUNDING_PROGRAM_TERM),
196                 DEPLOYMENT_TERM:VocabTermItem('NOT YET SET UP', DEPLOYMENT_TERM, title = DEPLOYMENT_TERM),
197                 
198                 STATIONARY_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', STATIONARY_PLATFORM_TERM, title = STATIONARY_PLATFORM_TERM),
199                 MOVING_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', MOVING_PLATFORM_TERM, title = MOVING_PLATFORM_TERM),
200                 LAND_STATION_TERM:VocabTermItem('NOT YET SET UP', LAND_STATION_TERM, title = LAND_STATION_TERM),
201                 MOORING_TERM:VocabTermItem('NOT YET SET UP', MOORING_TERM, title = MOORING_TERM),
202                 STATION_GROUP_TERM:VocabTermItem('NOT YET SET UP', STATION_GROUP_TERM, title = STATION_GROUP_TERM),
203                 SHIP_TERM:VocabTermItem('NOT YET SET UP', SHIP_TERM, title = SHIP_TERM),
204                 AIRCRAFT_TERM:VocabTermItem('NOT YET SET UP', AIRCRAFT_TERM, title = AIRCRAFT_TERM),
205                 SATELLITE_TERM:VocabTermItem('NOT YET SET UP', SATELLITE_TERM, title = SATELLITE_TERM),
206                 COMPUTER_TERM:VocabTermItem('NOT YET SET UP', COMPUTER_TERM, title = COMPUTER_TERM)
207                 }
208
209    ATOM_TYPES = [ACTIVITY_TERM, DE_TERM, DEPLOYMENT_TERM, DPT_TERM, GRANULE_TERM, OBS_TERM]
210   
211    DEPLOYABLE_ATOM_TYPES = [ACTIVITY_TERM, DPT_TERM, OBS_TERM]
212
213    PROVIDER_TYPES = [BADC_TERM, NEODC_TERM]
214
215    ONLINE_REF_TYPES = [ OBJECT_PAGE_TERM, DATA_URL_TERM, DATA_PAGE_TERM, \
216                         CURATOR_PAGE_TERM, EXT_METADATA_TERM, METADATA_SOURCE_TERM, \
217                         NUM_SIM_TERM, \
218                         OPENDAP_TERM, THREDDS_TERM, WMS_TERM, WCS_TERM, WFS_TERM, \
219                         DATA_EXTRACTOR_TERM, FILE_BROWSER_TERM, LAS_TERM, LOGO_TERM]
220#URI_TERM, \
221
222    ATOM_CATEGORY = "atom"
223    DEPLOYABLE_ATOM_CATEGORY = "deployableatom"
224    ONLINE_REF_CATEGORY = "onlineref"
225    PROVIDER_CATEGORY = "provider"
226   
227    # A dictionary to group the various valid subtypes of atoms - grouped by their
228    # main type
229    SUBTYPE_TERMS = {
230                     DPT_TERM: [
231                        LIDAR_TERM, RADAR_TERM, \
232                        SONDE_TERM, NAVIGATION_TERM, \
233                        GAS_CHROMATOGRAPH_TERM, SPECTROMETER_TERM, \
234                        MASS_SPECTROMETER_TERM,
235                        MET_SENSOR_TERM, DOAS_TERM,
236                        ASOZ_TERM, RADIOMETER_TERM,
237                        FAGE_TERM, IMAGER_TERM,
238                        FILTER_TERM, PARTICLE_COUNTER_TERM,
239                        SAMPLER_TERM, OTHER_INSTRUMENT_TYPE_TERM,
240                        MODEL_TERM, INSTRUMENT_TERM
241                        ],
242                     DE_TERM: [
243                        SIMULATION_TERM, ANALYSIS_TERM, \
244                        MEASUREMENT_TERM       
245                        ],
246                     ACTIVITY_TERM: [
247                        DATA_COLLECTION_TERM, DATA_PROJECT_TERM, \
248                        DATA_CAMPAIGN_TERM, DATA_INVESTIGATION_TERM, \
249                        FLIGHT_TERM, CRUISE_TERM, \
250                        FUNDING_PROGRAM_TERM, DEPLOYMENT_TERM
251                        ],
252                     OBS_TERM: [
253                        STATIONARY_PLATFORM_TERM, MOVING_PLATFORM_TERM, \
254                        LAND_STATION_TERM, MOORING_TERM, \
255                        STATION_GROUP_TERM, SHIP_TERM, \
256                        AIRCRAFT_TERM, SATELLITE_TERM, \
257                        COMPUTER_TERM
258                        ]
259                     }
260   
261    ONE_HOUR = 3600.0
262   
263    VOCAB_SERVER_URL = 'http://vocab.ndg.nerc.ac.uk/clients/whatLists'
264
265    BROWSE_ROOT_URL = "http://localhost:5000/view/"
266
267    def __init__(self):
268        logging.info("Setting up VocabTermData object")
269        self.VOCAB_DATA_PAGE = None
270        # introduce slight delay here
271        self.REFRESH_TIME = time.time() - 2.0
272        self.latestTermVersion = {}
273        logging.info("VocabTermData object set up")
274
275
276    def isValidSubType(self, mainType, subType):
277        '''
278        Determine whether a specified subtype is valid for a particular
279        main type
280        @param mainType: term ID of the main type of the data
281        @param subType: term ID of the subtype of the data
282        @return: True if the subtype is valid
283        @raise VocabTermDataError: if the mainType is not recognised or the subType is not valid
284        '''
285        if not self.SUBTYPE_TERMS.has_key(mainType):
286            errorMessage = "Error: unrecognised data type: '%s'" %mainType
287            logging.error(errorMessage)
288            raise ValueError(errorMessage)
289
290        if subType in self.SUBTYPE_TERMS[mainType]:
291            return True
292           
293        errorMessage = "Error: subtype, '%s' is not valid for data type, '%s'" \
294            %(subType, mainType)
295        raise VocabTermDataError(errorMessage)
296
297
298    def getValidTypes(self, category):
299        '''
300        Return a list of the valid types available for a particular data category
301        @param category: type of term info to look up - e.g. atoms or providers
302        - this should be specified using the ..._CATEGORY instance variables
303        defined above
304        @return list of VocabTermInfo objects for the specified category
305        @raise VocabTermDataError: if unrecognised data category
306        '''
307        logging.debug("Lookup up list of valid %s types" %category)
308        catList = []
309        if category == self.ATOM_CATEGORY:
310            catList = self.ATOM_TYPES
311        elif category == self.DEPLOYABLE_ATOM_CATEGORY:
312            catList = self.DEPLOYABLE_ATOM_TYPES
313        elif category == self.PROVIDER_CATEGORY:
314            catList = self.PROVIDER_TYPES
315        elif category == self.ONLINE_REF_CATEGORY:
316            catList = self.ONLINE_REF_TYPES
317        else:
318            errorMessage = "Unrecognised data category, '%s'" %category
319            raise VocabTermDataError(errorMessage)
320           
321        types = []
322        for st in catList:
323            types.append(self.TERM_DATA[st])
324        return types
325
326
327    def tidySubTypeTitle(self, title):
328        '''
329        Tidy up the title of subtypes slightly, if need be
330        - NB, these are inherited from original moles format so are
331        not really 'human readable'
332        '''
333        if title and title.startswith('dg'):
334            title = title[2:]
335            # and fix any camelcase - putting into words
336            title = re.sub('([A-Z])', r" \1", title).strip()
337
338        return title
339
340    def getValidSubTypes(self, atomType):
341        '''
342        Get list of subtypes that are valid wrt a specified atom type
343        @param atomType: term ID for the atom type whose subtypes need to
344        be looked up
345        @return: list of valid subtypes
346        '''
347        logging.debug("Lookup up subtypes for atom type, '%s'" %atomType)
348        subTypes = self.SUBTYPE_TERMS.get(atomType) or []
349        types = []
350        for st in subTypes:
351            item = self.TERM_DATA[st]
352            item.title = self.tidySubTypeTitle(item.title)
353            types.append(item)
354        logging.debug("Found subtypes: %s" %subTypes)
355        return types
356
357       
358    def getTermFromTitle(self, title):
359        '''
360        Given a term title/label, get back the related term id
361        @param title: title/label of term id to retrieve
362        @raise VocabTermDataError: if more than one title or no title is returned 
363        '''
364        termID = []
365        for val in self.TERM_DATA.itervalues():
366            if val.title == title:
367                termID.append(val.termID)
368       
369        if len(termID) != 1:
370            errorMessage = "Error: could not accurately determine the vocab term \
371                ID for the label, '%s' - %s values returned" %(title, len(termID))
372            raise VocabTermDataError(errorMessage)
373
374        return termID[0]
375       
376
377    def _getVocabDataPage(self):
378        '''
379        Getter method to allow regular refreshing of data
380        '''
381        if self.REFRESH_TIME < time.time():
382            self.getVocabServerData()
383       
384        return self.VOCAB_DATA_PAGE
385
386   
387    def getVocabServerData(self):
388        '''
389        Retrieve the web page containing the versioning info for the various vocab terms
390        '''
391        logging.info("Retrieving vocab server data page")
392        f = urllib.urlopen(self.VOCAB_SERVER_URL)
393        self.VOCAB_DATA_PAGE = f.read()
394        f.close()
395        self.REFRESH_TIME = time.time() + self.ONE_HOUR
396        logging.info("Vocab server data retrieved")
397
398
399    def getLatestTermVersions(self):
400        '''
401        Retrieve the latest versions of the vocab terms required
402        - NB, refreshes the data on an hourly basis, if necessary
403        '''
404        # now need to parse the returned data to get the current version number
405        for termName in self.TERM_DATA:
406            if termName not in self.latestTermVersion:
407                self.getLatestTermVersion(termName)
408
409                   
410    def getLatestTermVersion(self, termName):
411        '''
412        Parse the vocab server data and determine the latest version number of the term with the specified name
413        @param termName: name of term whose current version needs to be established
414        '''
415        logging.info("Getting latest term version for term, '%s'" %termName)
416        # check for recent data - NB, the term will probably share a base url with other terms
417        # so may have already have the correct URL set up
418        if termName not in self.latestTermVersion or self.REFRESH_TIME < time.time():
419   
420            uri = self.TERM_DATA[termName].vocabURL
421            currentVersion = self._getURIVersion(uri)
422
423            if currentVersion is not None:
424                if currentVersion:
425                    self.latestTermVersion[termName] = str(currentVersion)
426                else:
427                    self.latestTermVersion[termName] = str(uri)
428        logging.info("Latest term version for term, '%s' retrieved" %termName)
429
430
431    def _getURIVersion(self, uri):
432        '''
433        Parse the server data for a specified uri and return the latest version number of it
434        @param uri: uri to look for
435        @raise VocabTermDataError: if specified uri not found in vocab server
436        @return version of vocab uri, '' if uri contains latest version, None if uri not found
437        '''
438        logging.debug("Getting current version number of uri, '%s'" %uri)
439        # allow the data to be refreshed on an hourly basis
440        pageData = self._getVocabDataPage()
441       
442        uri = uri.replace('/term/','/list/')
443        if uri.endswith('/'):
444            uri = uri.rstrip('/')
445        # NB, the uri may already have a version number included in it - so allow this in the search
446        regExp = re.compile(r'(' + uri + '(/(\d{1,}))?)')
447        currentVersion = None
448        foundTerm = False
449        for termVals in regExp.findall(pageData):
450            foundTerm = True
451            version = termVals[2]
452            # version number must be included in original uri, if match found without a version number
453            # being split out - so ignore this - since the original uri is already specified in full
454            if not version:
455                logging.debug("Specified URI already contains current version")
456                return ''
457
458            # TODO: check how the versioning system will be done - i.e. 3.2.4 type versioning will cause this to break
459            if not currentVersion:
460                currentVersion = int(version)
461            elif int(version) > currentVersion:
462                currentVersion = int(version)
463
464        if currentVersion == None:
465            errorMessage = "Could not find information in vocab server for uri, '%s' - exiting" %uri
466            logging.error(errorMessage)
467            # TODO: uncomment the ValueError once all the vocab terms have been defined
468            #raise VocabTermDataError(errorMessage)
469        else:
470            logging.debug("Version number found: '%s'" %currentVersion)
471
472        return currentVersion
473
474       
475    def getTermCurrentVocabURL(self, termName):
476        '''
477        Get the current URL on the vocab server to the specified term
478        @param termName: name of term whose URL to return
479        @raise VocabTermDataError: if term not found
480        '''
481        logging.debug("Looking up vocab data for term: '%s'" %termName)
482       
483        if termName not in self.TERM_DATA:
484           
485            # NB, some item term IDs are different from their item name; this was
486            # to allow the import of historical data and should be fixed at some
487            # point
488            foundTerm = False
489            for key, item in self.TERM_DATA.items():
490                if termName == item.termID:
491                    termName = key
492                    foundTerm = True
493                    break
494                   
495            if not foundTerm:
496                raise VocabTermDataError("Could not find term, '%s' in \
497                    defined list of valid vocab terms - exiting" %termName)
498       
499        if termName not in self.latestTermVersion:
500            self.getLatestTermVersion(termName)
501
502        uri = None
503        if termName in self.latestTermVersion:
504            uri = self.TERM_DATA[termName].vocabURL + \
505                "/" + self.latestTermVersion[termName] + "/" + \
506                self.TERM_DATA[termName].termID
507            logging.debug("Returning vocab URL: '%s'" %uri)
508        else:
509            logging.debug("URL not found...")
510        return  uri
511   
512   
513    def getCurrentVocabURI(self, uri):
514        '''
515        Look up a specified URI and return the current version of it
516        '''
517        logging.debug("Looking up current version of uri: '%s'" %uri)
518        currentVersion = self._getURIVersion(uri)
519        logging.debug("URI version looked up")
520        # NB, this can validly return None or '' - ignore both cases
521        if currentVersion:
522            if not uri.endswith('/'):
523                uri += '/'
524            return uri + str(currentVersion)
525        return uri
526   
527    def getVTI(self, term):
528        '''
529        Return the vocab term item for the specified term
530        @param term: term ID - one of the constants defined above
531        @return VocabTermItem corresponding to term
532        @raise VocabTermDataError if term not found
533        '''
534        vti = self.TERM_DATA.get(term)
535        if not vti:
536            raise VocabTermDataError("No info for term, '%s', found" %term)
537   
538        return vti
539   
540   
541    def getTermItemfromFullVocabURI(self, uri):
542        '''
543        Given a full term id - i.e. with vocab uri + version + term ID
544        determine the correct vocab data term item
545        @param uri: full term ID with version + vocab uri
546        @return vocab term item corresponding to the input uri
547        @raise VocabTermDataError if term ID not found
548        '''
549        logging.debug("Determining term ID for uri, '%s'" %uri)
550        termID = uri.split('/')[-1]
551
552        return self.getTermItemfromURIAndTerm(uri, termID)
553   
554   
555    def getTermItemfromURIAndTerm(self, uri, termID):
556        '''
557        Given a term id and its uri
558        determine the correct vocab data term item
559        @param uri: base uri for vocab term - i.e. without versioning info
560        @param termID: term ID to look up
561        @return vocab term item corresponding to the input uri/term
562        @raise VocabTermDataError if term ID not found
563        '''
564        logging.debug("Determining term item for uri, '%s', term name, '%s'" %(uri, termID))
565        vti = None
566        if self.TERM_DATA.get(termID) and uri.startswith(self.TERM_DATA[termID].vocabURL):
567            vti = self.TERM_DATA[termID]
568        else:
569            for item in self.TERM_DATA.itervalues():
570                if item.termID == termID and uri.startswith(item.vocabURL):
571                    vti = item
572                    break
573       
574        if not vti:
575            raise VocabTermDataError("Unrecognised term URI: '%s'" %uri)
576
577        logging.debug("Found matching term - '%s'" %vti.termID)
578        return vti
579   
580   
581    def isDeployable(self, vti):
582        '''
583        Determines whether a vocab term item can be used as part of a deployment
584        @param vti: vocab term item to check
585        @return True if allowed as part of deployment, false otherwise
586        '''
587        logging.debug("Checking if item is deployable")
588        if vti:
589            if vti.termID == self.ACTIVITY_TERM or \
590                vti.termID == self.DPT_TERM or \
591                vti.termID == self.OBS_TERM:
592                logging.debug("- item is deployable")
593                return True
594       
595        logging.debug("- item is not deployable")
596        return False
597   
598   
599    def isDeployment(self, vti):
600        '''
601        Determines whether a vocab term item is a deployment item
602        @param vti: vocab term item to check
603        @return True if a deployment, false otherwise
604        '''
605        logging.debug("Checking if item is a deployment")
606        if vti:
607            if vti.termID == self.DEPLOYMENT_TERM:
608                logging.debug("- item is a deployment")
609                return True
610       
611        logging.debug("- item is not a deployment")
612        return False
613   
614   
615    def isGranule(self, vti):
616        '''
617        Determines whether a vocab term item is a granule item
618        @param vti: vocab term item to check
619        @return True if a granule, false otherwise
620        '''
621        logging.debug("Checking if item is a granule")
622        if vti:
623            if vti.termID == self.GRANULE_TERM:
624                logging.debug("- item is a granule")
625                return True
626       
627        logging.debug("- item is not a granule")
628        return False
Note: See TracBrowser for help on using the repository browser.