source: exist/trunk/python/ndgUtils/vocabtermdata.py @ 4532

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/vocabtermdata.py@4532
Revision 4532, 28.0 KB checked in by cbyrom, 11 years ago (diff)

Generalise atomDeploymentsList.xq to become atomTypeList.xq and adjust
surrounding code + tidy up some vocab data wrt deployment terms.

Line 
1#!/usr/bin/env python
2'''
3 Class to store and access the various vocab term data
4 
5 @author: C Byrom, Tessella Jul 2008
6'''
7import sys, logging, commands, string, os, time, re, urllib
8   
9def isValidTermURI(uri):
10    '''
11    Determines whether a specific vocab term uri is valid - NB, using the
12    utilities.checkURL method won't typically work since the RDF data is
13    exposed directly - i.e. without HEAD information which is usually looked
14    for
15    @param uri: vocab term uri to check
16    @return: True if valid, false otherwise
17    '''
18    logging.debug("Checking vocab term uri, '%s'" %uri)
19    try:
20        page = urllib.urlopen(uri)
21        pageData = page.read()
22        if pageData.find('<rdf:RDF') > -1:
23            logging.debug("- found valid term")
24            return True
25       
26    except Exception, e:
27        logging.error("Exception thrown whilst verifying uri: '%s'" %e.message)
28
29    logging.debug("- term appears to be invalid")
30    return False
31                     
32
33class VocabTermItem(object):
34    '''
35    Class representing single vocab term item
36    '''
37    def __init__(self, vocabURL, termID, title=None):
38        self.vocabURL = vocabURL
39        self.termID = termID
40        self.title = title
41
42
43class VocabTermDataError(ValueError):
44    """
45    Exception handling for VocabTermData class.
46    """
47    def __init__(self, msg):
48        logging.error(msg)
49        ValueError.__init__(self, msg)
50
51
52class VocabTermData(object):
53    '''
54    Class representing vocab term data - including
55    methods to look these up to ensure they are current
56    '''
57
58    OBJECT_PAGE_TERM = 'ObjectPage'
59    DATA_URL_TERM = 'DataURL'
60    DATA_PAGE_TERM = 'DataPage'
61    CURATOR_PAGE_TERM = 'CuratorPage'
62    EXT_METADATA_TERM = 'ExtMetadata'
63    METADATA_SOURCE_TERM = 'MetadataSource'
64    URI_TERM = 'URI'
65    LOGO_TERM = 'LOGO'
66    NUM_SIM_TERM = 'NumSim'
67    OPENDAP_TERM = 'OPENDAP'
68    THREDDS_TERM = 'THREDDS'
69    WMS_TERM = 'WMS'
70    WCS_TERM = 'WCS'
71    WFS_TERM = 'WFS'
72    LAS_TERM = 'LAS'
73    DATA_EXTRACTOR_TERM = 'DataExtractor' 
74    FILE_BROWSER_TERM = 'FileBrowser'   
75    CSML_TERM = 'CSML'
76   
77    ACTIVITY_TERM = 'ACTIVITY'
78    DPT_TERM = 'DPT'
79    OBS_TERM = 'OBS'
80    GRANULE_TERM = 'GRANULE'
81    DE_TERM = "DE"
82   
83    # dpt subtypes
84    LIDAR_TERM = "dgLidar"
85    RADAR_TERM = "dgRadar"
86    SONDE_TERM = "dgSonde"
87    NAVIGATION_TERM = "dgNavigation"
88    GAS_CHROMATOGRAPH_TERM = "dgGasChromatograph"
89    SPECTROMETER_TERM = "dgSpectrometer"
90    MASS_SPECTROMETER_TERM = "dgMassSpectrometer"
91    MET_SENSOR_TERM = "dgMetSensor"
92    DOAS_TERM = "dgDOAS"
93    ASOZ_TERM = "dgASOZ"
94    RADIOMETER_TERM = "dgRadiometer"
95    FAGE_TERM = "dgFAGE"
96    IMAGER_TERM = "dgImager"
97    FILTER_TERM = "dgFilter"
98    PARTICLE_COUNTER_TERM = "dgParticleCounter"
99    SAMPLER_TERM = "dgSampler"
100    OTHER_INSTRUMENT_TYPE_TERM = "dgOtherInstrumentType"
101    MODEL_TERM = "dgModel"
102    INSTRUMENT_TERM = "dgInstrument"
103   
104    # de subtypes
105    SIMULATION_TERM = "dgSimulation"
106    ANALYSIS_TERM = "dgAnalysis"
107    MEASUREMENT_TERM = "dgMeasurement"
108   
109    # activity subtypes
110    DATA_COLLECTION_TERM = "dgActivityDataCollection"
111    DATA_PROJECT_TERM = "dgActivityDataProject"
112    DATA_CAMPAIGN_TERM = "dgActivityDataCampaign"
113    DATA_INVESTIGATION_TERM = "dgActivityDataInvestigation"
114    FLIGHT_TERM = "dgFlight"
115    CRUISE_TERM = "dgCruise"
116    FUNDING_PROGRAM_TERM = "dgFundingProgram"
117    DEPLOYMENT_TERM = "Deployment"
118    # NB, this is a specialised activity with the subtype deployment pre-set
119    ACTIVITY_DEPLOYMENT_TERM = "ActivityDeployment"
120   
121    # obs subtypes
122    STATIONARY_PLATFORM_TERM = "dgStationaryPlatform"
123    MOVING_PLATFORM_TERM = "dgMovingPlatform"
124    LAND_STATION_TERM = "dgLandStation"
125    MOORING_TERM = "dgMooring"
126    STATION_GROUP_TERM = "dgStationGroup"
127    SHIP_TERM = "dgShip"
128    AIRCRAFT_TERM = "dgAircraft"
129    SATELLITE_TERM = "dgSatellite"
130    COMPUTER_TERM = "dgComputer"
131
132    # provider types
133    BADC_TERM = 'badc.nerc.ac.uk'
134    NEODC_TERM = 'neodc.nerc.ac.uk'
135   
136    TERM_DATA = {
137                 OBJECT_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '1', title = 'Object Home Page'),
138                 DATA_URL_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '2', title = 'Data URL'),
139                 DATA_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '3', title = 'Data Home Page'),
140                 CURATOR_PAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '4', title = 'Curator Home Page'),
141                 EXT_METADATA_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '5', title = 'Extended Metadata'),
142                 METADATA_SOURCE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '6', title = 'Original Metadata Source'),
143                 #URI_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '3URI', title = 'Data Home Page'),
144                 
145                 BADC_TERM:VocabTermItem('NOT YET SET UP', BADC_TERM, title = 'British Atmospheric Data Centre'),
146                 NEODC_TERM:VocabTermItem('NOT YET SET UP', NEODC_TERM, title = 'NERC Earth Observation Data Centre'),
147                 
148                 LOGO_TERM:VocabTermItem('NOT YET SET UP', LOGO_TERM, title = 'Logo'),
149                 NUM_SIM_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '7', title = 'NumSim description'),
150                 OPENDAP_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU010', title = 'GET DATA &gt; OPENDAP DATA (DODS)'),
151                 THREDDS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU013', title = 'GET DATA &gt; THREDDS DATA'),
152                 WMS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU026', title = 'GET SERVICE &gt; GET WEB MAP SERVICE (WMS)'),
153                 WCS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU024', title = 'GET SERVICE &gt; GET WEB COVERAGE SERVICE (WCS)'),
154                 WFS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU025', title = 'GET SERVICE &gt; GET WEB FEATURE SERVICE (WFS)'),
155                 LAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU006', title = 'GET DATA &gt; LAS'),
156                 DATA_EXTRACTOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '8', title = 'NDG DataExtractor'),
157                 FILE_BROWSER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '9', title = 'File Browser'),
158                 CSML_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N021', '1'),
159                 ACTIVITY_TERM:VocabTermItem('Activity - NOT YET SET UP', ACTIVITY_TERM, title = 'Activity'),
160                 ACTIVITY_DEPLOYMENT_TERM:VocabTermItem('Activity Deployment - NOT YET SET UP', ACTIVITY_DEPLOYMENT_TERM, title = DEPLOYMENT_TERM),
161                 DPT_TERM:VocabTermItem('DPT - NOT YET SET UP', DPT_TERM, title = 'Data Production Tool'),
162                 OBS_TERM:VocabTermItem('OBS - NOT YET SET UP', OBS_TERM, title = 'Observation Station'),
163                 GRANULE_TERM:VocabTermItem('GRAN - NOT YET SET UP', GRANULE_TERM, title = 'Data Granule'),
164                 DE_TERM:VocabTermItem('DE - NOT YET SET UP', DE_TERM, title = 'Data Entity'),
165                 
166                 LIDAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG02', title = "Lidar"),
167                 RADAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG03', title = "Radar"),
168                 SONDE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG04', title = "Sonde"),
169                 NAVIGATION_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG05', title = "Navigation"),
170                 GAS_CHROMATOGRAPH_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG06', title = "Gas Chromatograph"),
171                 SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG07', title = SPECTROMETER_TERM),
172                 MASS_SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG08', title = MASS_SPECTROMETER_TERM),
173                 MET_SENSOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG09', title = MET_SENSOR_TERM),
174                 DOAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG10', title = DOAS_TERM),
175                 ASOZ_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG11', title = ASOZ_TERM),
176                 RADIOMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG12', title = RADIOMETER_TERM),
177                 FAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG13', title = FAGE_TERM),
178                 IMAGER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG14', title = IMAGER_TERM),
179                 FILTER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG15', title = FILTER_TERM),
180                 PARTICLE_COUNTER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG16', title = PARTICLE_COUNTER_TERM),
181                 SAMPLER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG17', title = SAMPLER_TERM),
182                 OTHER_INSTRUMENT_TYPE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG99', title = OTHER_INSTRUMENT_TYPE_TERM),
183                 MODEL_TERM:VocabTermItem('dgModel - NOT YET SET UP', MODEL_TERM, title = MODEL_TERM),
184                 INSTRUMENT_TERM:VocabTermItem('dgInstrument - NOT YET SET UP', INSTRUMENT_TERM, title = INSTRUMENT_TERM),
185                 
186                 SIMULATION_TERM:VocabTermItem('NOT YET SET UP', SIMULATION_TERM, title = SIMULATION_TERM),
187                 ANALYSIS_TERM:VocabTermItem('NOT YET SET UP', ANALYSIS_TERM, title = ANALYSIS_TERM),
188                 MEASUREMENT_TERM:VocabTermItem('NOT YET SET UP', MEASUREMENT_TERM, title = MEASUREMENT_TERM),
189                 DATA_COLLECTION_TERM:VocabTermItem('NOT YET SET UP', DATA_COLLECTION_TERM, title = DATA_COLLECTION_TERM),
190                 DATA_PROJECT_TERM:VocabTermItem('NOT YET SET UP', DATA_PROJECT_TERM, title = DATA_PROJECT_TERM),
191                 DATA_CAMPAIGN_TERM:VocabTermItem('NOT YET SET UP', DATA_CAMPAIGN_TERM, title = DATA_CAMPAIGN_TERM),
192                 DATA_INVESTIGATION_TERM:VocabTermItem('NOT YET SET UP', DATA_INVESTIGATION_TERM, title = DATA_INVESTIGATION_TERM),
193                 FLIGHT_TERM:VocabTermItem('NOT YET SET UP', FLIGHT_TERM, title = FLIGHT_TERM),
194                 CRUISE_TERM:VocabTermItem('NOT YET SET UP', CRUISE_TERM, title = CRUISE_TERM),
195                 FUNDING_PROGRAM_TERM:VocabTermItem('NOT YET SET UP', FUNDING_PROGRAM_TERM, title = FUNDING_PROGRAM_TERM),
196                 DEPLOYMENT_TERM:VocabTermItem('NOT YET SET UP', DEPLOYMENT_TERM, title = DEPLOYMENT_TERM),
197                 
198                 STATIONARY_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', STATIONARY_PLATFORM_TERM, title = STATIONARY_PLATFORM_TERM),
199                 MOVING_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', MOVING_PLATFORM_TERM, title = MOVING_PLATFORM_TERM),
200                 LAND_STATION_TERM:VocabTermItem('NOT YET SET UP', LAND_STATION_TERM, title = LAND_STATION_TERM),
201                 MOORING_TERM:VocabTermItem('NOT YET SET UP', MOORING_TERM, title = MOORING_TERM),
202                 STATION_GROUP_TERM:VocabTermItem('NOT YET SET UP', STATION_GROUP_TERM, title = STATION_GROUP_TERM),
203                 SHIP_TERM:VocabTermItem('NOT YET SET UP', SHIP_TERM, title = SHIP_TERM),
204                 AIRCRAFT_TERM:VocabTermItem('NOT YET SET UP', AIRCRAFT_TERM, title = AIRCRAFT_TERM),
205                 SATELLITE_TERM:VocabTermItem('NOT YET SET UP', SATELLITE_TERM, title = SATELLITE_TERM),
206                 COMPUTER_TERM:VocabTermItem('NOT YET SET UP', COMPUTER_TERM, title = COMPUTER_TERM)
207                 }
208
209    ATOM_TYPES = [DE_TERM, GRANULE_TERM, ACTIVITY_DEPLOYMENT_TERM, ACTIVITY_TERM, DPT_TERM, OBS_TERM]
210   
211    DEPLOYABLE_ATOM_TYPES = [ACTIVITY_TERM, DPT_TERM, OBS_TERM]
212
213    PROVIDER_TYPES = [BADC_TERM, NEODC_TERM]
214
215    ONLINE_REF_TYPES = [ OBJECT_PAGE_TERM, DATA_URL_TERM, DATA_PAGE_TERM, \
216                         CURATOR_PAGE_TERM, EXT_METADATA_TERM, METADATA_SOURCE_TERM, \
217                         NUM_SIM_TERM, \
218                         OPENDAP_TERM, THREDDS_TERM, WMS_TERM, WCS_TERM, WFS_TERM, \
219                         DATA_EXTRACTOR_TERM, FILE_BROWSER_TERM, LAS_TERM, LOGO_TERM]
220#URI_TERM, \
221
222    ATOM_CATEGORY = "atom"
223    DEPLOYABLE_ATOM_CATEGORY = "deployableatom"
224    ONLINE_REF_CATEGORY = "onlineref"
225    PROVIDER_CATEGORY = "provider"
226   
227    # A dictionary to group the various valid subtypes of atoms - grouped by their
228    # main type
229    SUBTYPE_TERMS = {
230                     DPT_TERM: [
231                        LIDAR_TERM, RADAR_TERM, \
232                        SONDE_TERM, NAVIGATION_TERM, \
233                        GAS_CHROMATOGRAPH_TERM, SPECTROMETER_TERM, \
234                        MASS_SPECTROMETER_TERM,
235                        MET_SENSOR_TERM, DOAS_TERM,
236                        ASOZ_TERM, RADIOMETER_TERM,
237                        FAGE_TERM, IMAGER_TERM,
238                        FILTER_TERM, PARTICLE_COUNTER_TERM,
239                        SAMPLER_TERM, OTHER_INSTRUMENT_TYPE_TERM,
240                        MODEL_TERM, INSTRUMENT_TERM
241                        ],
242                     DE_TERM: [
243                        SIMULATION_TERM, ANALYSIS_TERM, \
244                        MEASUREMENT_TERM       
245                        ],
246                     ACTIVITY_TERM: [
247                        DATA_COLLECTION_TERM, DATA_PROJECT_TERM, \
248                        DATA_CAMPAIGN_TERM, DATA_INVESTIGATION_TERM, \
249                        FLIGHT_TERM, CRUISE_TERM, \
250                        FUNDING_PROGRAM_TERM, DEPLOYMENT_TERM
251                        ],
252                     OBS_TERM: [
253                        STATIONARY_PLATFORM_TERM, MOVING_PLATFORM_TERM, \
254                        LAND_STATION_TERM, MOORING_TERM, \
255                        STATION_GROUP_TERM, SHIP_TERM, \
256                        AIRCRAFT_TERM, SATELLITE_TERM, \
257                        COMPUTER_TERM
258                        ]
259                     }
260   
261    ONE_HOUR = 3600.0
262   
263    VOCAB_SERVER_URL = 'http://vocab.ndg.nerc.ac.uk/clients/whatLists'
264
265#    BROWSE_ROOT_URL = "http://snow.badc.rl.ac.uk:5000/view/"
266    BROWSE_ROOT_URL = "http://localhost:5000/view/"
267
268    def __init__(self):
269        logging.info("Setting up VocabTermData object")
270        self.VOCAB_DATA_PAGE = None
271        # introduce slight delay here
272        self.REFRESH_TIME = time.time() - 2.0
273        self.latestTermVersion = {}
274        logging.info("VocabTermData object set up")
275
276
277    def isValidSubType(self, mainType, subType):
278        '''
279        Determine whether a specified subtype is valid for a particular
280        main type
281        @param mainType: term ID of the main type of the data
282        @param subType: term ID of the subtype of the data
283        @return: True if the subtype is valid
284        @raise VocabTermDataError: if the mainType is not recognised or the subType is not valid
285        '''
286        if not self.SUBTYPE_TERMS.has_key(mainType):
287            errorMessage = "Error: unrecognised data type: '%s'" %mainType
288            logging.error(errorMessage)
289            raise ValueError(errorMessage)
290
291        if subType in self.SUBTYPE_TERMS[mainType]:
292            return True
293           
294        errorMessage = "Error: subtype, '%s' is not valid for data type, '%s'" \
295            %(subType, mainType)
296        raise VocabTermDataError(errorMessage)
297
298
299    def getValidTypes(self, category):
300        '''
301        Return a list of the valid types available for a particular data category
302        @param category: type of term info to look up - e.g. atoms or providers
303        - this should be specified using the ..._CATEGORY instance variables
304        defined above
305        @return list of VocabTermInfo objects for the specified category
306        @raise VocabTermDataError: if unrecognised data category
307        '''
308        logging.debug("Lookup up list of valid %s types" %category)
309        catList = []
310        if category == self.ATOM_CATEGORY:
311            catList = self.ATOM_TYPES
312        elif category == self.DEPLOYABLE_ATOM_CATEGORY:
313            catList = self.DEPLOYABLE_ATOM_TYPES
314        elif category == self.PROVIDER_CATEGORY:
315            catList = self.PROVIDER_TYPES
316        elif category == self.ONLINE_REF_CATEGORY:
317            catList = self.ONLINE_REF_TYPES
318        else:
319            errorMessage = "Unrecognised data category, '%s'" %category
320            raise VocabTermDataError(errorMessage)
321           
322        types = []
323        for st in catList:
324            types.append(self.TERM_DATA[st])
325        return types
326
327
328    def tidySubTypeTitle(self, title):
329        '''
330        Tidy up the title of subtypes slightly, if need be
331        - NB, these are inherited from original moles format so are
332        not really 'human readable'
333        '''
334        if title and title.startswith('dg'):
335            title = title[2:]
336            # and fix any camelcase - putting into words
337            title = re.sub('([A-Z])', r" \1", title).strip()
338
339        return title
340
341    def getValidSubTypes(self, atomType):
342        '''
343        Get list of subtypes that are valid wrt a specified atom type
344        @param atomType: term ID for the atom type whose subtypes need to
345        be looked up
346        @return: list of valid subtypes
347        '''
348        logging.debug("Lookup up subtypes for atom type, '%s'" %atomType)
349        subTypes = self.SUBTYPE_TERMS.get(atomType) or []
350        types = []
351        for st in subTypes:
352            item = self.TERM_DATA[st]
353            item.title = self.tidySubTypeTitle(item.title)
354            types.append(item)
355        logging.debug("Found subtypes: %s" %subTypes)
356        return types
357
358       
359    def getTermFromTitle(self, title):
360        '''
361        Given a term title/label, get back the related term id
362        @param title: title/label of term id to retrieve
363        @raise VocabTermDataError: if more than one title or no title is returned 
364        '''
365        termID = []
366        for val in self.TERM_DATA.itervalues():
367            if val.title == title:
368                termID.append(val.termID)
369       
370        if len(termID) != 1:
371            errorMessage = "Error: could not accurately determine the vocab term \
372                ID for the label, '%s' - %s values returned" %(title, len(termID))
373            raise VocabTermDataError(errorMessage)
374
375        return termID[0]
376       
377
378    def _getVocabDataPage(self):
379        '''
380        Getter method to allow regular refreshing of data
381        '''
382        if self.REFRESH_TIME < time.time():
383            self.getVocabServerData()
384       
385        return self.VOCAB_DATA_PAGE
386
387   
388    def getVocabServerData(self):
389        '''
390        Retrieve the web page containing the versioning info for the various vocab terms
391        '''
392        logging.info("Retrieving vocab server data page")
393        f = urllib.urlopen(self.VOCAB_SERVER_URL)
394        self.VOCAB_DATA_PAGE = f.read()
395        f.close()
396        self.REFRESH_TIME = time.time() + self.ONE_HOUR
397        logging.info("Vocab server data retrieved")
398
399
400    def getLatestTermVersions(self):
401        '''
402        Retrieve the latest versions of the vocab terms required
403        - NB, refreshes the data on an hourly basis, if necessary
404        '''
405        # now need to parse the returned data to get the current version number
406        for termName in self.TERM_DATA:
407            if termName not in self.latestTermVersion:
408                self.getLatestTermVersion(termName)
409
410                   
411    def getLatestTermVersion(self, termName):
412        '''
413        Parse the vocab server data and determine the latest version number of the term with the specified name
414        @param termName: name of term whose current version needs to be established
415        '''
416        logging.info("Getting latest term version for term, '%s'" %termName)
417        # check for recent data - NB, the term will probably share a base url with other terms
418        # so may have already have the correct URL set up
419        if termName not in self.latestTermVersion or self.REFRESH_TIME < time.time():
420   
421            uri = self.TERM_DATA[termName].vocabURL
422            currentVersion = self._getURIVersion(uri)
423
424            self.latestTermVersion[termName] = str(currentVersion)
425            #if currentVersion is not None:
426            #    if currentVersion:
427            #        self.latestTermVersion[termName] = str(currentVersion)
428            #    else:
429            #        self.latestTermVersion[termName] = str(uri)
430        logging.info("Latest term version for term, '%s' retrieved" %termName)
431
432
433    def _getURIVersion(self, uri):
434        '''
435        Parse the server data for a specified uri and return the latest version number of it
436        @param uri: uri to look for
437        @raise VocabTermDataError: if specified uri not found in vocab server
438        @return version of vocab uri, '' if uri contains latest version, None if uri not found
439        '''
440        logging.debug("Getting current version number of uri, '%s'" %uri)
441        # allow the data to be refreshed on an hourly basis
442        pageData = self._getVocabDataPage()
443       
444        uri = uri.replace('/term/','/list/')
445        if uri.endswith('/'):
446            uri = uri.rstrip('/')
447        # NB, the uri may already have a version number included in it - so allow this in the search
448        regExp = re.compile(r'(' + uri + '(/(\d{1,}))?)')
449        currentVersion = None
450        foundTerm = False
451        for termVals in regExp.findall(pageData):
452            foundTerm = True
453            version = termVals[2]
454            # version number must be included in original uri, if match found without a version number
455            # being split out - so ignore this - since the original uri is already specified in full
456            if not version:
457                logging.debug("Specified URI already contains current version")
458                return ''
459
460            # TODO: check how the versioning system will be done - i.e. 3.2.4 type versioning will cause this to break
461            if not currentVersion:
462                currentVersion = int(version)
463            elif int(version) > currentVersion:
464                currentVersion = int(version)
465
466        if currentVersion == None:
467            errorMessage = "Could not find information in vocab server for uri, '%s' - exiting" %uri
468            logging.error(errorMessage)
469            # TODO: uncomment the ValueError once all the vocab terms have been defined
470            #raise VocabTermDataError(errorMessage)
471        else:
472            logging.debug("Version number found: '%s'" %currentVersion)
473
474        return currentVersion
475
476       
477    def getTermCurrentVocabURL(self, termName):
478        '''
479        Get the current URL on the vocab server to the specified term
480        @param termName: name of term whose URL to return
481        @raise VocabTermDataError: if term not found
482        '''
483        logging.debug("Looking up vocab data for term: '%s'" %termName)
484       
485        baseURL = None
486        if termName not in self.TERM_DATA:
487           
488            # NB, some item term IDs are different from their item name; this was
489            # to allow the import of historical data and should be fixed at some
490            # point
491            foundTerm = False
492            for key, item in self.TERM_DATA.items():
493                if termName == item.termID:
494                    termName = key
495                    foundTerm = True
496                    baseURL = item.vocabURL
497                    break
498                   
499            if not foundTerm:
500                raise VocabTermDataError("Could not find term, '%s' in \
501                    defined list of valid vocab terms - exiting" %termName)
502        else:
503            baseURL = self.TERM_DATA[termName].vocabURL
504           
505        if termName not in self.latestTermVersion:
506            self.getLatestTermVersion(termName)
507
508        uri = None
509        if termName in self.latestTermVersion:
510            uri = baseURL + \
511                "/" + self.latestTermVersion[termName] + "/" + \
512                self.TERM_DATA[termName].termID
513            logging.debug("Returning vocab URL: '%s'" %uri)
514        else:
515            logging.debug("URL not found - returning base URL, '%s'" %baseURL)
516            uri = baseURL
517           
518        return  uri
519   
520   
521    def getCurrentVocabURI(self, uri):
522        '''
523        Look up a specified URI and return the current version of it
524        '''
525        logging.debug("Looking up current version of uri: '%s'" %uri)
526        currentVersion = self._getURIVersion(uri)
527        logging.debug("URI version looked up")
528        # NB, this can validly return None or '' - ignore both cases
529        if currentVersion:
530            if not uri.endswith('/'):
531                uri += '/'
532            return uri + str(currentVersion)
533        return uri
534   
535    def getVTI(self, term):
536        '''
537        Return the vocab term item for the specified term
538        @param term: term ID - one of the constants defined above
539        @return VocabTermItem corresponding to term
540        @raise VocabTermDataError if term not found
541        '''
542        vti = self.TERM_DATA.get(term)
543        if not vti:
544            raise VocabTermDataError("No info for term, '%s', found" %term)
545   
546        return vti
547   
548   
549    def getTermItemfromFullVocabURI(self, uri):
550        '''
551        Given a full term id - i.e. with vocab uri + version + term ID
552        determine the correct vocab data term item
553        @param uri: full term ID with version + vocab uri
554        @return vocab term item corresponding to the input uri
555        @raise VocabTermDataError if term ID not found
556        '''
557        logging.debug("Determining term ID for uri, '%s'" %uri)
558        termID = uri.split('/')[-1]
559
560        return self.getTermItemfromURIAndTerm(uri, termID)
561   
562   
563    def getTermItemfromURIAndTerm(self, uri, termID):
564        '''
565        Given a term id and its uri
566        determine the correct vocab data term item
567        @param uri: base uri for vocab term - i.e. without versioning info
568        @param termID: term ID to look up
569        @return vocab term item corresponding to the input uri/term
570        @raise VocabTermDataError if term ID not found
571        '''
572        logging.debug("Determining term item for uri, '%s', term name, '%s'" %(uri, termID))
573        vti = None
574        if self.TERM_DATA.get(termID) and uri.startswith(self.TERM_DATA[termID].vocabURL):
575            vti = self.TERM_DATA[termID]
576        else:
577            for item in self.TERM_DATA.itervalues():
578                if item.termID == termID and uri.startswith(item.vocabURL):
579                    vti = item
580                    break
581       
582        if not vti:
583            raise VocabTermDataError("Unrecognised term URI: '%s'" %uri)
584
585        logging.debug("Found matching term - '%s'" %vti.termID)
586        return vti
587   
588   
589    def isDeployable(self, vti):
590        '''
591        Determines whether a vocab term item can be used as part of a deployment
592        @param vti: vocab term item to check
593        @return True if allowed as part of deployment, false otherwise
594        '''
595        logging.debug("Checking if item is deployable")
596        if vti:
597            if vti.termID == self.ACTIVITY_TERM or \
598                vti.termID == self.DPT_TERM or \
599                vti.termID == self.OBS_TERM:
600                logging.debug("- item is deployable")
601                return True
602       
603        logging.debug("- item is not deployable")
604        return False
605   
606   
607    def isDeployment(self, vti):
608        '''
609        Determines whether a vocab term item is a deployment item
610        @param vti: vocab term item to check
611        @return True if a deployment, false otherwise
612        '''
613        logging.debug("Checking if item is a deployment")
614        if vti:
615            if vti.termID == self.DEPLOYMENT_TERM:
616                logging.debug("- item is a deployment")
617                return True
618       
619        logging.debug("- item is not a deployment")
620        return False
621   
622   
623    def isGranule(self, vti):
624        '''
625        Determines whether a vocab term item is a granule item
626        @param vti: vocab term item to check
627        @return True if a granule, false otherwise
628        '''
629        logging.debug("Checking if item is a granule")
630        if vti:
631            if vti.termID == self.GRANULE_TERM:
632                logging.debug("- item is a granule")
633                return True
634       
635        logging.debug("- item is not a granule")
636        return False
Note: See TracBrowser for help on using the repository browser.