source: exist/trunk/python/ndgUtils/vocabtermdata.py @ 4313

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/vocabtermdata.py@4313
Revision 4313, 25.8 KB checked in by cbyrom, 11 years ago (diff)

Improve error handling - using user defined exception + include better
searching for items with matching term IDs.

Line 
1#!/usr/bin/env python
2'''
3 Class to store and access the various vocab term data
4 
5 @author: C Byrom, Tessella Jul 2008
6'''
7import sys, logging, commands, string, os, time, re
8import urllib
9
10class VocabTermItem(object):
11    '''
12    Class representing single vocab term item
13    '''
14    def __init__(self, vocabURL, termID, title=None):
15        self.vocabURL = vocabURL
16        self.termID = termID
17        self.title = title
18
19
20class VocabTermDataError(ValueError):
21    """
22    Exception handling for VocabTermData class.
23    """
24    def __init__(self, msg):
25        logging.error(msg)
26        ValueError.__init__(self, msg)
27
28
29class VocabTermData(object):
30    '''
31    Class representing vocab term data - including
32    methods to look these up to ensure they are current
33    '''
34
35    OBJECT_PAGE_TERM = 'ObjectPage'
36    DATA_URL_TERM = 'DataURL'
37    DATA_PAGE_TERM = 'DataPage'
38    CURATOR_PAGE_TERM = 'CuratorPage'
39    EXT_METADATA_TERM = 'ExtMetadata'
40    METADATA_SOURCE_TERM = 'MetadataSource'
41    URI_TERM = 'URI'
42    LOGO_TERM = 'LOGO'
43    NUM_SIM_TERM = 'NumSim'
44    OPENDAP_TERM = 'OPENDAP'
45    THREDDS_TERM = 'THREDDS'
46    WMS_TERM = 'WMS'
47    WCS_TERM = 'WCS'
48    WFS_TERM = 'WFS'
49    LAS_TERM = 'LAS'
50    DATA_EXTRACTOR_TERM = 'DataExtractor' 
51    FILE_BROWSER_TERM = 'FileBrowser'   
52    CSML_TERM = 'CSML'
53   
54    ACTIVITY_TERM = 'ACTIVITY'
55    DPT_TERM = 'DPT'
56    OBS_TERM = 'OBS'
57    GRANULE_TERM = 'GRANULE'
58    DE_TERM = "DE"
59   
60    # dpt subtypes
61    LIDAR_TERM = "dgLidar"
62    RADAR_TERM = "dgRadar"
63    SONDE_TERM = "dgSonde"
64    NAVIGATION_TERM = "dgNavigation"
65    GAS_CHROMATOGRAPH_TERM = "dgGasChromatograph"
66    SPECTROMETER_TERM = "dgSpectrometer"
67    MASS_SPECTROMETER_TERM = "dgMassSpectrometer"
68    MET_SENSOR_TERM = "dgMetSensor"
69    DOAS_TERM = "dgDOAS"
70    ASOZ_TERM = "dgASOZ"
71    RADIOMETER_TERM = "dgRadiometer"
72    FAGE_TERM = "dgFAGE"
73    IMAGER_TERM = "dgImager"
74    FILTER_TERM = "dgFilter"
75    PARTICLE_COUNTER_TERM = "dgParticleCounter"
76    SAMPLER_TERM = "dgSampler"
77    OTHER_INSTRUMENT_TYPE_TERM = "dgOtherInstrumentType"
78    MODEL_TERM = "dgModel"
79    INSTRUMENT_TERM = "dgInstrument"
80   
81    # de subtypes
82    SIMULATION_TERM = "dgSimulation"
83    ANALYSIS_TERM = "dgAnalysis"
84    MEASUREMENT_TERM = "dgMeasurement"
85   
86    # activity subtypes
87    DATA_COLLECTION_TERM = "dgActivityDataCollection"
88    DATA_PROJECT_TERM = "dgActivityDataProject"
89    DATA_CAMPAIGN_TERM = "dgActivityDataCampaign"
90    DATA_INVESTIGATION_TERM = "dgActivityDataInvestigation"
91    FLIGHT_TERM = "dgFlight"
92    CRUISE_TERM = "dgCruise"
93    FUNDING_PROGRAM_TERM = "dgFundingProgram"
94    DEPLOYMENT_TERM = "Deployment"
95    # NB, this is a specialised activity with the subtype deployment pre-set
96    ACTIVITY_DEPLOYMENT_TERM = "ActivityDeployment"
97   
98    # obs subtypes
99    STATIONARY_PLATFORM_TERM = "dgStationaryPlatform"
100    MOVING_PLATFORM_TERM = "dgMovingPlatform"
101    LAND_STATION_TERM = "dgLandStation"
102    MOORING_TERM = "dgMooring"
103    STATION_GROUP_TERM = "dgStationGroup"
104    SHIP_TERM = "dgShip"
105    AIRCRAFT_TERM = "dgAircraft"
106    SATELLITE_TERM = "dgSatellite"
107    COMPUTER_TERM = "dgComputer"
108
109    # provider types
110    BADC_TERM = 'badc.nerc.ac.uk'
111    NEODC_TERM = 'neodc.nerc.ac.uk'
112   
113    TERM_DATA = {
114                 OBJECT_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '1', title = 'Object Home Page'),
115                 DATA_URL_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '2', title = 'Data URL'),
116                 DATA_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '3', title = 'Data Home Page'),
117                 CURATOR_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '4', title = 'Curator Home Page'),
118                 EXT_METADATA_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '5', title = 'Extended Metadata'),
119                 METADATA_SOURCE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '6', title = 'Original Metadata Source'),
120                 #URI_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '3URI', title = 'Data Home Page'),
121                 
122                 BADC_TERM:VocabTermItem('NOT YET SET UP', BADC_TERM, title = 'British Atmospheric Data Centre'),
123                 NEODC_TERM:VocabTermItem('NOT YET SET UP', NEODC_TERM, title = 'NERC Earth Observation Data Centre'),
124                 
125                 LOGO_TERM:VocabTermItem('LOGO', 'LOGO', title = 'Logo'),
126                 NUM_SIM_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '7', title = 'NumSim description'),
127                 OPENDAP_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU010', title = 'GET DATA > OPENDAP DATA (DODS)'),
128                 THREDDS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU013', title = 'GET DATA > THREDDS DATA'),
129                 WMS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU026', title = 'GET SERVICE > GET WEB MAP SERVICE (WMS)'),
130                 WCS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU024', title = 'GET SERVICE > GET WEB COVERAGE SERVICE (WCS)'),
131                 WFS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU025', title = 'GET SERVICE > GET WEB FEATURE SERVICE (WFS)'),
132                 LAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU006', title = 'GET DATA > LAS'),
133                 DATA_EXTRACTOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '8', title = 'NDG DataExtractor'),
134                 FILE_BROWSER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '9', title = 'File Browser'),
135                 CSML_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N021', '1'),
136                 ACTIVITY_TERM:VocabTermItem('Activity - NOT YET SET UP', ACTIVITY_TERM, title = 'Activity'),
137                 ACTIVITY_DEPLOYMENT_TERM:VocabTermItem('Activity Deployment - NOT YET SET UP', ACTIVITY_DEPLOYMENT_TERM, title = 'Activity Deployment'),
138                 DPT_TERM:VocabTermItem('DPT - NOT YET SET UP', DPT_TERM, title = 'Data Production Tool'),
139                 OBS_TERM:VocabTermItem('OBS - NOT YET SET UP', OBS_TERM, title = 'Observation Station'),
140                 GRANULE_TERM:VocabTermItem('GRAN - NOT YET SET UP', GRANULE_TERM, title = 'Data Granule'),
141                 DE_TERM:VocabTermItem('DE - NOT YET SET UP', DE_TERM, title = 'Data Entity'),
142                 
143                 LIDAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG02', title = "Lidar"),
144                 RADAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG03', title = "Radar"),
145                 SONDE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG04', title = "Sonde"),
146                 NAVIGATION_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG05', title = "Navigation"),
147                 GAS_CHROMATOGRAPH_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG06', title = "Gas Chromatograph"),
148                 SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG07', title = SPECTROMETER_TERM),
149                 MASS_SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG08', title = MASS_SPECTROMETER_TERM),
150                 MET_SENSOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG09', title = MET_SENSOR_TERM),
151                 DOAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG10', title = DOAS_TERM),
152                 ASOZ_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG11', title = ASOZ_TERM),
153                 RADIOMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG12', title = RADIOMETER_TERM),
154                 FAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG13', title = FAGE_TERM),
155                 IMAGER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG14', title = IMAGER_TERM),
156                 FILTER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG15', title = FILTER_TERM),
157                 PARTICLE_COUNTER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG16', title = PARTICLE_COUNTER_TERM),
158                 SAMPLER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG17', title = SAMPLER_TERM),
159                 OTHER_INSTRUMENT_TYPE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG99', title = OTHER_INSTRUMENT_TYPE_TERM),
160                 MODEL_TERM:VocabTermItem('dgModel - NOT YET SET UP', MODEL_TERM, title = MODEL_TERM),
161                 INSTRUMENT_TERM:VocabTermItem('dgInstrument - NOT YET SET UP', INSTRUMENT_TERM, title = INSTRUMENT_TERM),
162                 
163                 SIMULATION_TERM:VocabTermItem('NOT YET SET UP', SIMULATION_TERM, title = SIMULATION_TERM),
164                 ANALYSIS_TERM:VocabTermItem('NOT YET SET UP', ANALYSIS_TERM, title = ANALYSIS_TERM),
165                 MEASUREMENT_TERM:VocabTermItem('NOT YET SET UP', MEASUREMENT_TERM, title = MEASUREMENT_TERM),
166                 DATA_COLLECTION_TERM:VocabTermItem('NOT YET SET UP', DATA_COLLECTION_TERM, title = DATA_COLLECTION_TERM),
167                 DATA_PROJECT_TERM:VocabTermItem('NOT YET SET UP', DATA_PROJECT_TERM, title = DATA_PROJECT_TERM),
168                 DATA_CAMPAIGN_TERM:VocabTermItem('NOT YET SET UP', DATA_CAMPAIGN_TERM, title = DATA_CAMPAIGN_TERM),
169                 DATA_INVESTIGATION_TERM:VocabTermItem('NOT YET SET UP', DATA_INVESTIGATION_TERM, title = DATA_INVESTIGATION_TERM),
170                 FLIGHT_TERM:VocabTermItem('NOT YET SET UP', FLIGHT_TERM, title = FLIGHT_TERM),
171                 CRUISE_TERM:VocabTermItem('NOT YET SET UP', CRUISE_TERM, title = CRUISE_TERM),
172                 FUNDING_PROGRAM_TERM:VocabTermItem('NOT YET SET UP', FUNDING_PROGRAM_TERM, title = FUNDING_PROGRAM_TERM),
173                 DEPLOYMENT_TERM:VocabTermItem('NOT YET SET UP', DEPLOYMENT_TERM, title = DEPLOYMENT_TERM),
174                 
175                 STATIONARY_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', STATIONARY_PLATFORM_TERM, title = STATIONARY_PLATFORM_TERM),
176                 MOVING_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', MOVING_PLATFORM_TERM, title = MOVING_PLATFORM_TERM),
177                 LAND_STATION_TERM:VocabTermItem('NOT YET SET UP', LAND_STATION_TERM, title = LAND_STATION_TERM),
178                 MOORING_TERM:VocabTermItem('NOT YET SET UP', MOORING_TERM, title = MOORING_TERM),
179                 STATION_GROUP_TERM:VocabTermItem('NOT YET SET UP', STATION_GROUP_TERM, title = STATION_GROUP_TERM),
180                 SHIP_TERM:VocabTermItem('NOT YET SET UP', SHIP_TERM, title = SHIP_TERM),
181                 AIRCRAFT_TERM:VocabTermItem('NOT YET SET UP', AIRCRAFT_TERM, title = AIRCRAFT_TERM),
182                 SATELLITE_TERM:VocabTermItem('NOT YET SET UP', SATELLITE_TERM, title = SATELLITE_TERM),
183                 COMPUTER_TERM:VocabTermItem('NOT YET SET UP', COMPUTER_TERM, title = COMPUTER_TERM)
184                 }
185
186    ATOM_TYPES = [ACTIVITY_TERM, DE_TERM, DPT_TERM, GRANULE_TERM, OBS_TERM]
187   
188    DEPLOYABLE_ATOM_TYPES = [ACTIVITY_TERM, DPT_TERM, OBS_TERM]
189
190    PROVIDER_TYPES = [BADC_TERM, NEODC_TERM]
191
192    ONLINE_REF_TYPES = [ OBJECT_PAGE_TERM, DATA_URL_TERM, DATA_PAGE_TERM, \
193                         CURATOR_PAGE_TERM, EXT_METADATA_TERM, METADATA_SOURCE_TERM, \
194                         NUM_SIM_TERM, \
195                         OPENDAP_TERM, THREDDS_TERM, WMS_TERM, WCS_TERM, WFS_TERM, \
196                         DATA_EXTRACTOR_TERM, FILE_BROWSER_TERM, LAS_TERM, LOGO_TERM]
197#URI_TERM, \
198
199    ATOM_CATEGORY = "atom"
200    DEPLOYABLE_ATOM_CATEGORY = "deployableatom"
201    ONLINE_REF_CATEGORY = "onlineref"
202    PROVIDER_CATEGORY = "provider"
203   
204    # A dictionary to group the various valid subtypes of atoms - grouped by their
205    # main type
206    SUBTYPE_TERMS = {
207                     DPT_TERM: [
208                        LIDAR_TERM, RADAR_TERM, \
209                        SONDE_TERM, NAVIGATION_TERM, \
210                        GAS_CHROMATOGRAPH_TERM, SPECTROMETER_TERM, \
211                        MASS_SPECTROMETER_TERM,
212                        MET_SENSOR_TERM, DOAS_TERM,
213                        ASOZ_TERM, RADIOMETER_TERM,
214                        FAGE_TERM, IMAGER_TERM,
215                        FILTER_TERM, PARTICLE_COUNTER_TERM,
216                        SAMPLER_TERM, OTHER_INSTRUMENT_TYPE_TERM,
217                        MODEL_TERM, INSTRUMENT_TERM
218                        ],
219                     DE_TERM: [
220                        SIMULATION_TERM, ANALYSIS_TERM, \
221                        MEASUREMENT_TERM       
222                        ],
223                     ACTIVITY_TERM: [
224                        DATA_COLLECTION_TERM, DATA_PROJECT_TERM, \
225                        DATA_CAMPAIGN_TERM, DATA_INVESTIGATION_TERM, \
226                        FLIGHT_TERM, CRUISE_TERM, \
227                        FUNDING_PROGRAM_TERM, DEPLOYMENT_TERM
228                        ],
229                     OBS_TERM: [
230                        STATIONARY_PLATFORM_TERM, MOVING_PLATFORM_TERM, \
231                        LAND_STATION_TERM, MOORING_TERM, \
232                        STATION_GROUP_TERM, SHIP_TERM, \
233                        AIRCRAFT_TERM, SATELLITE_TERM, \
234                        COMPUTER_TERM
235                        ]
236                     }
237   
238    ONE_HOUR = 3600.0
239   
240    VOCAB_SERVER_URL = 'http://vocab.ndg.nerc.ac.uk/clients/whatLists'
241
242    BROWSE_ROOT_URL = "http://localhost:5000/view/"
243
244    def __init__(self):
245        logging.info("Setting up VocabTermData object")
246        self.VOCAB_DATA_PAGE = None
247        # introduce slight delay here
248        self.REFRESH_TIME = time.time() - 2.0
249        self.latestTermVersion = {}
250        logging.info("VocabTermData object set up")
251
252
253    def isValidSubType(self, mainType, subType):
254        '''
255        Determine whether a specified subtype is valid for a particular
256        main type
257        @param mainType: term ID of the main type of the data
258        @param subType: term ID of the subtype of the data
259        @return: True if the subtype is valid
260        @raise VocabTermDataError: if the mainType is not recognised or the subType is not valid
261        '''
262        if not self.SUBTYPE_TERMS.has_key(mainType):
263            errorMessage = "Error: unrecognised data type: '%s'" %mainType
264            logging.error(errorMessage)
265            raise ValueError(errorMessage)
266
267        if subType in self.SUBTYPE_TERMS[mainType]:
268            return True
269           
270        errorMessage = "Error: subtype, '%s' is not valid for data type, '%s'" \
271            %(subType, mainType)
272        raise VocabTermDataError(errorMessage)
273
274
275    def getValidTypes(self, category):
276        '''
277        Return a list of the valid types available for a particular data category
278        @param category: type of term info to look up - e.g. atoms or providers
279        - this should be specified using the ..._CATEGORY instance variables
280        defined above
281        @return list of VocabTermInfo objects for the specified category
282        @raise VocabTermDataError: if unrecognised data category
283        '''
284        logging.debug("Lookup up list of valid %s types" %category)
285        catList = []
286        if category == self.ATOM_CATEGORY:
287            catList = self.ATOM_TYPES
288        elif category == self.DEPLOYABLE_ATOM_CATEGORY:
289            catList = self.DEPLOYABLE_ATOM_TYPES
290        elif category == self.PROVIDER_CATEGORY:
291            catList = self.PROVIDER_TYPES
292        elif category == self.ONLINE_REF_CATEGORY:
293            catList = self.ONLINE_REF_TYPES
294        else:
295            errorMessage = "Unrecognised data category, '%s'" %category
296            raise VocabTermDataError(errorMessage)
297           
298        types = []
299        for st in catList:
300            types.append(self.TERM_DATA[st])
301        return types
302
303
304    def tidySubTypeTitle(self, title):
305        '''
306        Tidy up the title of subtypes slightly, if need be
307        - NB, these are inherited from original moles format so are
308        not really 'human readable'
309        '''
310        if title and title.startswith('dg'):
311            title = title[2:]
312            # and fix any camelcase - putting into words
313            title = re.sub('([A-Z])', r" \1", title).strip()
314
315        return title
316
317    def getValidSubTypes(self, atomType):
318        '''
319        Get list of subtypes that are valid wrt a specified atom type
320        @param atomType: term ID for the atom type whose subtypes need to
321        be looked up
322        @return: list of valid subtypes
323        '''
324        logging.debug("Lookup up subtypes for atom type, '%s'" %atomType)
325        subTypes = self.SUBTYPE_TERMS.get(atomType) or []
326        types = []
327        for st in subTypes:
328            item = self.TERM_DATA[st]
329            item.title = self.tidySubTypeTitle(item.title)
330            types.append(item)
331        logging.debug("Found subtypes: %s" %subTypes)
332        return types
333
334       
335    def getTermFromTitle(self, title):
336        '''
337        Given a term title/label, get back the related term id
338        @param title: title/label of term id to retrieve
339        @raise VocabTermDataError: if more than one title or no title is returned 
340        '''
341        termID = []
342        for val in self.TERM_DATA.itervalues():
343            if val.title == title:
344                termID.append(val.termID)
345       
346        if len(termID) != 1:
347            errorMessage = "Error: could not accurately determine the vocab term \
348                ID for the label, '%s' - %s values returned" %(title, len(termID))
349            raise VocabTermDataError(errorMessage)
350
351        return termID[0]
352       
353
354    def _getVocabDataPage(self):
355        '''
356        Getter method to allow regular refreshing of data
357        '''
358        if self.REFRESH_TIME < time.time():
359            self.getVocabServerData()
360       
361        return self.VOCAB_DATA_PAGE
362
363   
364    def getVocabServerData(self):
365        '''
366        Retrieve the web page containing the versioning info for the various vocab terms
367        '''
368        logging.info("Retrieving vocab server data page")
369        f = urllib.urlopen(self.VOCAB_SERVER_URL)
370        self.VOCAB_DATA_PAGE = f.read()
371        f.close()
372        self.REFRESH_TIME = time.time() + self.ONE_HOUR
373        logging.info("Vocab server data retrieved")
374
375
376    def getLatestTermVersions(self):
377        '''
378        Retrieve the latest versions of the vocab terms required
379        - NB, refreshes the data on an hourly basis, if necessary
380        '''
381        # now need to parse the returned data to get the current version number
382        for termName in self.TERM_DATA:
383            if termName not in self.latestTermVersion:
384                self.getLatestTermVersion(termName)
385
386                   
387    def getLatestTermVersion(self, termName):
388        '''
389        Parse the vocab server data and determine the latest version number of the term with the specified name
390        @param termName: name of term whose current version needs to be established
391        '''
392        logging.info("Getting latest term version for term, '%s'" %termName)
393        # check for recent data - NB, the term will probably share a base url with other terms
394        # so may have already have the correct URL set up
395        if termName not in self.latestTermVersion or self.REFRESH_TIME < time.time():
396   
397            uri = self.TERM_DATA[termName].vocabURL
398            currentVersion = self._getURIVersion(uri)
399   
400            self.latestTermVersion[termName] = str(currentVersion)
401        logging.info("Latest term version for term, '%s' retrieved" %termName)
402
403
404    def _getURIVersion(self, uri):
405        '''
406        Parse the server data for a specified uri and return the latest version number of it
407        @param uri: uri to look for
408        @raise VocabTermDataError: if specified uri not found in vocab server
409        @return version of vocab uri, '' if uri contains latest version and None if uri not found
410        '''
411        # allow the data to be refreshed on an hourly basis
412        pageData = self._getVocabDataPage()
413       
414        uri = uri.replace('/term/','/list/')
415        if uri.endswith('/'):
416            uri = uri.rstrip('/')
417        # NB, the uri may already have a version number included in it - so allow this in the search
418        regExp = re.compile(r'(' + uri + '(/(\d{1,}))?)')
419        currentVersion = None
420        foundTerm = False
421        for termVals in regExp.findall(pageData):
422            foundTerm = True
423            version = termVals[2]
424            # version number must be included in original uri, if match found without a version number
425            # being split out - so ignore this - since the original uri is already specified in full
426            if not version:
427                currentVersion = ''
428                break
429
430            # TODO: check how the versioning system will be done - i.e. 3.2.4 type versioning will cause this to break
431            if not currentVersion:
432                currentVersion = int(version)
433            elif int(version) > currentVersion:
434                currentVersion = int(version)
435
436        if currentVersion == None:
437            errorMessage = "Could not find information in vocab server for uri, '%s' - exiting" %uri
438            logging.error(errorMessage)
439            # TODO: uncomment the ValueError once all the vocab terms have been defined
440            #raise VocabTermDataError(errorMessage)
441
442        return currentVersion
443
444       
445    def getTermCurrentVocabURL(self, termName):
446        '''
447        Get the current URL on the vocab server to the specified term
448        @param termName: name of term whose URL to return
449        @raise VocabTermDataError: if term not found
450        '''
451        logging.debug("Looking up vocab data for term: '%s'" %termName)
452       
453        if termName not in self.TERM_DATA:
454           
455            # NB, some item term IDs are different from their item name; this was
456            # to allow the import of historical data and should be fixed at some
457            # point
458            foundTerm = False
459            for key, item in self.TERM_DATA.items():
460                if termName == item.termID:
461                    termName = key
462                    foundTerm = True
463                    break
464                   
465            if not foundTerm:
466                raise VocabTermDataError("Could not find term, '%s' in \
467                    defined list of valid vocab terms - exiting" %termName)
468       
469        if termName not in self.latestTermVersion:
470            self.getLatestTermVersion(termName)
471
472        uri = self.TERM_DATA[termName].vocabURL + \
473            "/" + self.latestTermVersion[termName] + "/" + \
474            self.TERM_DATA[termName].termID
475        logging.debug("Returning vocab URL: '%s'" %uri)
476        return  uri
477   
478   
479    def getCurrentVocabURI(self, uri):
480        '''
481        Look up a specified URI and return the current version of it
482        '''
483        logging.debug("Looking up current version of uri: '%s'" %uri)
484        currentVersion = self._getURIVersion(uri)
485        logging.debug("URI version looked up")
486        if currentVersion is not None:
487            if not uri.endswith('/'):
488                uri += '/'
489            return uri + str(currentVersion)
490        return uri
491   
492    def getVTI(self, term):
493        '''
494        Return the vocab term item for the specified term
495        @param term: term ID - one of the constants defined above
496        @return VocabTermItem corresponding to term
497        @raise VocabTermDataError if term not found
498        '''
499        vti = self.TERM_DATA.get(term)
500        if not vti:
501            raise VocabTermDataError("No info for term, '%s', found" %term)
502   
503        return vti
504   
505   
506    def getTermItemfromFullVocabURI(self, uri):
507        '''
508        Given a full term id - i.e. with vocab uri + version + term ID
509        determine the correct vocab data term item
510        @param uri: full term ID with version + vocab uri
511        @return vocab term item corresponding to the input uri
512        @raise VocabTermDataError if term ID not found
513        '''
514        logging.debug("Determining term ID for uri, '%s'" %uri)
515        termID = uri.split('/')[-1]
516        vti = None
517        if self.TERM_DATA.get(termID) and uri.startswith(self.TERM_DATA[termID].vocabURL):
518            vti = self.TERM_DATA[termID]
519        else:
520            for item in self.TERM_DATA.itervalues():
521                if item.termID == termID and uri.startswith(item.vocabURL):
522                    vti = item
523                    break
524       
525        if not vti:
526            raise VocabTermDataError("Unrecognised term URI: '%s'" %uri)
527
528        logging.debug("Found matching term - '%s'" %vti.termID)
529        return vti
530   
531   
532    def isDeployable(self, vti):
533        '''
534        Determines whether a vocab term item can be used as part of a deployment
535        @param vti: vocab term item to check
536        @return True if allowed as part of deployment, false otherwise
537        '''
538        logging.debug("Checking if item is deployable")
539        if vti:
540            if vti.termID == self.ACTIVITY_TERM or \
541                vti.termID == self.DPT_TERM or \
542                vti.termID == self.OBS_TERM:
543                logging.debug("- item is deployable")
544                return True
545       
546        logging.debug("- item is not deployable")
547        return False
548   
549   
550    def isDeployment(self, vti):
551        '''
552        Determines whether a vocab term item is a deployment item
553        @param vti: vocab term item to check
554        @return True if a deployment, false otherwise
555        '''
556        logging.debug("Checking if item is a deployment")
557        if vti:
558            if vti.termID == self.DEPLOYMENT_TERM:
559                logging.debug("- item is a deployment")
560                return True
561       
562        logging.debug("- item is not a deployment")
563        return False
564   
565   
566    def isGranule(self, vti):
567        '''
568        Determines whether a vocab term item is a granule item
569        @param vti: vocab term item to check
570        @return True if a granule, false otherwise
571        '''
572        logging.debug("Checking if item is a granule")
573        if vti:
574            if vti.termID == self.GRANULE_TERM:
575                logging.debug("- item is a granule")
576                return True
577       
578        logging.debug("- item is not a granule")
579        return False
580               
Note: See TracBrowser for help on using the repository browser.