source: exist/trunk/python/ndgUtils/vocabtermdata.py @ 4219

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/vocabtermdata.py@4219
Revision 4219, 17.4 KB checked in by cbyrom, 12 years ago (diff)

Simplify vocab data by only returning termIDs for the subtypes + fix
a typo + extend Atom, allowing dicts of values to be set in the constructor +
add method to determine the eXist collection relating to the atom state.

Line 
1#!/usr/bin/env python
2'''
3 Class to store and access the various vocab term data
4 
5 @author: C Byrom, Tessella Jul 2008
6'''
7import sys, logging, commands, string, os, time, re
8import urllib
9
10class VocabTermItem(object):
11    '''
12    Class representing single vocab term item
13    '''
14    def __init__(self, vocabURL, termID, title=None):
15        self.vocabURL = vocabURL
16        self.termID = termID
17        self.title = title
18
19
20class VocabTermData(object):
21    '''
22    Class representing vocab term data - including
23    methods to look these up to ensure they are current
24    '''
25
26    URI_TERM = 'URI'
27    LOGO_TERM = 'LOGO'
28    NUM_SIM_TERM = 'NumSim'
29    OPENDAP_TERM = 'OPENDAP'
30    THREDDS_TERM = 'THREDDS'
31    WMS_TERM = 'WMS'
32    WCS_TERM = 'WCS'
33    WFS_TERM = 'WFS'
34    LAS_TERM = 'LAS'
35    DATA_EXTRACTOR_TERM = 'DataExtractor' 
36    FILE_BROWSER_TERM = 'FileBrowser'   
37    CSML_TERM = 'CSML'
38    ACTIVITY_TERM = 'ACTIVITY'
39    DPT_TERM = 'DPT'
40    OBS_TERM = 'OBS'
41    GRANULE_TERM = 'GRANULE'
42    DE_TERM = "DE"
43   
44    # dpt subtypes
45    LIDAR_TERM = "dgLidar"
46    RADAR_TERM = "dgRadar"
47    SONDE_TERM = "dgSonde"
48    NAVIGATION_TERM = "dgNavigation"
49    GAS_CHROMATOGRAPH_TERM = "dgGasChromatograph"
50    SPECTROMETER_TERM = "dgSpectrometer"
51    MASS_SPECTROMETER_TERM = "dgMassSpectrometer"
52    MET_SENSOR_TERM = "dgMetSensor"
53    DOAS_TERM = "dgDOAS"
54    ASOZ_TERM = "dgASOZ"
55    RADIOMETER_TERM = "dgRadiometer"
56    FAGE_TERM = "dgFAGE"
57    IMAGER_TERM = "dgImager"
58    FILTER_TERM = "dgFilter"
59    PARTICLE_COUNTER_TERM = "dgParticleCounter"
60    SAMPLER_TERM = "dgSampler"
61    OTHER_INSTRUMENT_TYPE_TERM = "dgOtherInstrumentType"
62    MODEL_TERM = "dgModel"
63    INSTRUMENT_TERM = "dgInstrument"
64   
65    # de subtypes
66    SIMULATION_TERM = "dgSimulation"
67    ANALYSIS_TERM = "dgAnalysis"
68    MEASUREMENT_TERM = "dgMeasurement"
69   
70    # activity subtypes
71    DATA_COLLECTION_TERM = "dgActivityDataCollection"
72    DATA_PROJECT_TERM = "dgActivityDataProject"
73    DATA_CAMPAIGN_TERM = "dgActivityDataCampaign"
74    DATA_INVESTIGATION_TERM = "dgActivityDataInvestigation"
75    FLIGHT_TERM = "dgFlight"
76    CRUISE_TERM = "dgCruise"
77    FUNDING_PROGRAM_TERM = "dgFundingProgram"
78   
79    # obs subtypes
80    STATIONARY_PLATFORM_TERM = "dgStationaryPlatform"
81    MOVING_PLATFORM_TERM = "dgMovingPlatform"
82    LAND_STATION_TERM = "dgLandStation"
83    MOORING_TERM = "dgMooring"
84    STATION_GROUP_TERM = "dgStationGroup"
85    SHIP_TERM = "dgShip"
86    AIRCRAFT_TERM = "dgAircraft"
87    SATELLITE_TERM = "dgSatellite"
88    COMPUTER_TERM = "dgComputer"
89
90    TERM_DATA = {
91                 URI_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '3', title = 'Data Home Page'),
92                 LOGO_TERM:VocabTermItem('LOGO', 'LOGO', title = 'Logo'),
93                 NUM_SIM_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '7', title = 'NumSim description'),
94                 OPENDAP_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU010', title = 'GET DATA > OPENDAP DATA (DODS)'),
95                 THREDDS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU013', title = 'GET DATA > THREDDS DATA'),
96                 WMS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU026', title = 'GET SERVICE > GET WEB MAP SERVICE (WMS)'),
97                 WCS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU024', title = 'GET SERVICE > GET WEB COVERAGE SERVICE (WCS)'),
98                 WFS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU025', title = 'GET SERVICE > GET WEB FEATURE SERVICE (WFS)'),
99                 LAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU006', title = 'GET DATA > LAS'),
100                 DATA_EXTRACTOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '8', title = 'NDG DataExtractor'),
101                 FILE_BROWSER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '9', title = 'File Browser'),
102                 CSML_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N021', '1'),
103                 ACTIVITY_TERM:VocabTermItem('Activity - NOT YET SET UP', ACTIVITY_TERM, title = 'Activity'),
104                 DPT_TERM:VocabTermItem('DPT - NOT YET SET UP', DPT_TERM, title = 'Data Production Tool'),
105                 OBS_TERM:VocabTermItem('OBS - NOT YET SET UP', OBS_TERM, title = 'Observation Station'),
106                 GRANULE_TERM:VocabTermItem('GRAN - NOT YET SET UP', GRANULE_TERM, title = 'Data Granule'),
107                 DE_TERM:VocabTermItem('DE - NOT YET SET UP', DE_TERM, title = 'Data Entity'),
108                 
109                 LIDAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG02', title = LIDAR_TERM),
110                 RADAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG03', title = RADAR_TERM),
111                 SONDE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG04', title = SONDE_TERM),
112                 NAVIGATION_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG05', title = NAVIGATION_TERM),
113                 GAS_CHROMATOGRAPH_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG06', title = GAS_CHROMATOGRAPH_TERM),
114                 SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG07', title = SPECTROMETER_TERM),
115                 MASS_SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG08', title = MASS_SPECTROMETER_TERM),
116                 MET_SENSOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG09', title = MET_SENSOR_TERM),
117                 DOAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG10', title = DOAS_TERM),
118                 ASOZ_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG11', title = ASOZ_TERM),
119                 RADIOMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG12', title = RADIOMETER_TERM),
120                 FAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG13', title = FAGE_TERM),
121                 IMAGER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG14', title = IMAGER_TERM),
122                 FILTER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG15', title = FILTER_TERM),
123                 PARTICLE_COUNTER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG16', title = PARTICLE_COUNTER_TERM),
124                 SAMPLER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG17', title = SAMPLER_TERM),
125                 OTHER_INSTRUMENT_TYPE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG99', title = OTHER_INSTRUMENT_TYPE_TERM),
126                 MODEL_TERM:VocabTermItem('dgModel - NOT YET SET UP', MODEL_TERM, title = MODEL_TERM),
127                 INSTRUMENT_TERM:VocabTermItem('dgInstrument - NOT YET SET UP', INSTRUMENT_TERM, title = INSTRUMENT_TERM),
128                 
129                 SIMULATION_TERM:VocabTermItem('NOT YET SET UP', SIMULATION_TERM, title = SIMULATION_TERM),
130                 ANALYSIS_TERM:VocabTermItem('NOT YET SET UP', ANALYSIS_TERM, title = ANALYSIS_TERM),
131                 MEASUREMENT_TERM:VocabTermItem('NOT YET SET UP', MEASUREMENT_TERM, title = MEASUREMENT_TERM),
132                 DATA_COLLECTION_TERM:VocabTermItem('NOT YET SET UP', DATA_COLLECTION_TERM, title = DATA_COLLECTION_TERM),
133                 DATA_PROJECT_TERM:VocabTermItem('NOT YET SET UP', DATA_PROJECT_TERM, title = DATA_PROJECT_TERM),
134                 DATA_CAMPAIGN_TERM:VocabTermItem('NOT YET SET UP', DATA_CAMPAIGN_TERM, title = DATA_CAMPAIGN_TERM),
135                 DATA_INVESTIGATION_TERM:VocabTermItem('NOT YET SET UP', DATA_INVESTIGATION_TERM, title = DATA_INVESTIGATION_TERM),
136                 FLIGHT_TERM:VocabTermItem('NOT YET SET UP', FLIGHT_TERM, title = FLIGHT_TERM),
137                 CRUISE_TERM:VocabTermItem('NOT YET SET UP', CRUISE_TERM, title = CRUISE_TERM),
138                 FUNDING_PROGRAM_TERM:VocabTermItem('NOT YET SET UP', FUNDING_PROGRAM_TERM, title = FUNDING_PROGRAM_TERM),
139                 
140                 STATIONARY_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', STATIONARY_PLATFORM_TERM, title = STATIONARY_PLATFORM_TERM),
141                 MOVING_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', MOVING_PLATFORM_TERM, title = MOVING_PLATFORM_TERM),
142                 LAND_STATION_TERM:VocabTermItem('NOT YET SET UP', LAND_STATION_TERM, title = LAND_STATION_TERM),
143                 MOORING_TERM:VocabTermItem('NOT YET SET UP', MOORING_TERM, title = MOORING_TERM),
144                 STATION_GROUP_TERM:VocabTermItem('NOT YET SET UP', STATION_GROUP_TERM, title = STATION_GROUP_TERM),
145                 SHIP_TERM:VocabTermItem('NOT YET SET UP', SHIP_TERM, title = SHIP_TERM),
146                 AIRCRAFT_TERM:VocabTermItem('NOT YET SET UP', AIRCRAFT_TERM, title = AIRCRAFT_TERM),
147                 SATELLITE_TERM:VocabTermItem('NOT YET SET UP', SATELLITE_TERM, title = SATELLITE_TERM),
148                 COMPUTER_TERM:VocabTermItem('NOT YET SET UP', COMPUTER_TERM, title = COMPUTER_TERM)
149                 }
150
151
152   
153    # A dictionary to group the various valid subtypes of atoms - grouped by their
154    # main type
155    SUBTYPE_TERMS = {
156                     DPT_TERM: [
157                        LIDAR_TERM, RADAR_TERM, \
158                        SONDE_TERM, NAVIGATION_TERM, \
159                        GAS_CHROMATOGRAPH_TERM, SPECTROMETER_TERM, \
160                        MASS_SPECTROMETER_TERM,
161                        MET_SENSOR_TERM, DOAS_TERM,
162                        ASOZ_TERM, RADIOMETER_TERM,
163                        FAGE_TERM, IMAGER_TERM,
164                        FILTER_TERM, PARTICLE_COUNTER_TERM,
165                        SAMPLER_TERM, OTHER_INSTRUMENT_TYPE_TERM,
166                        MODEL_TERM, INSTRUMENT_TERM
167                        ],
168                     DE_TERM: [
169                        SIMULATION_TERM, ANALYSIS_TERM, \
170                        MEASUREMENT_TERM       
171                        ],
172                     ACTIVITY_TERM: [
173                        DATA_COLLECTION_TERM, DATA_PROJECT_TERM, \
174                        DATA_CAMPAIGN_TERM, DATA_INVESTIGATION_TERM, \
175                        FLIGHT_TERM, CRUISE_TERM, \
176                        FUNDING_PROGRAM_TERM,
177                        ],
178                     OBS_TERM: [
179                        STATIONARY_PLATFORM_TERM, MOVING_PLATFORM_TERM, \
180                        LAND_STATION_TERM, MOORING_TERM, \
181                        STATION_GROUP_TERM, SHIP_TERM, \
182                        AIRCRAFT_TERM, SATELLITE_TERM, \
183                        COMPUTER_TERM
184                        ]
185                     }
186             
187
188
189   
190   
191    ONE_HOUR = 3600.0
192   
193    VOCAB_SERVER_URL = 'http://vocab.ndg.nerc.ac.uk/clients/whatLists'
194
195    BROWSE_ROOT_URL = "http://localhost:5000/view/"
196
197    def __init__(self):
198        logging.info("Setting up VocabTermData object")
199        self.VOCAB_DATA_PAGE = None
200        self.REFRESH_TIME = time.time()
201        self.latestTermVersion = {}
202        logging.info("VocabTermData object set up")
203
204
205    def isValidSubType(self, mainType, subType):
206        '''
207        Determine whether a specified subtype is valid for a particular
208        main type
209        @param mainType: term ID of the main type of the data
210        @param subType: term ID of the subtype of the data
211        @return: True if the subtype is valid
212        @raise ValueError: if the mainType is not recognised or the subType is not valid
213        '''
214        if not self.SUBTYPE_TERMS.has_key(mainType):
215            errorMessage = "Error: unrecognised data type: '%s'" %mainType
216            logging.error(errorMessage)
217            raise ValueError(errorMessage)
218
219        if subType in self.SUBTYPE_TERMS[mainType]:
220            return True
221           
222        errorMessage = "Error: subtype, '%s' is not valid for data type, '%s'" \
223            %(subType, mainType)
224        logging.error(errorMessage)
225        raise ValueError(errorMessage)
226
227       
228    def getTermFromTitle(self, title):
229        '''
230        Given a term title/label, get back the related term id
231        @param title: title/label of term id to retrieve
232        @raise ValueError: if more than one title or no title is returned 
233        '''
234        termID = []
235        for val in self.TERM_DATA.itervalues():
236            if val.title == title:
237                termID.append(val.termID)
238       
239        if len(termID) != 1:
240            errorMessage = "Error: could not accurately determine the vocab term \
241                ID for the label, '%s' - %s values returned" %(title, len(termID))
242            logging.error(errorMessage)
243            raise ValueError(errorMessage)
244
245        return termID[0]
246       
247
248    def _getVocabDataPage(self):
249        '''
250        Getter method to allow regular refreshing of data
251        '''
252        if self.REFRESH_TIME < time.time():
253            self.getVocabServerData()
254       
255        return self.VOCAB_DATA_PAGE
256
257   
258    def getVocabServerData(self):
259        '''
260        Retrieve the web page containing the versioning info for the various vocab terms
261        '''
262        logging.info("Retrieving vocab server data page")
263        f = urllib.urlopen(self.VOCAB_SERVER_URL)
264        self.VOCAB_DATA_PAGE = f.read()
265        f.close()
266        self.REFRESH_TIME = time.time() + self.ONE_HOUR
267        logging.info("Vocab server data retrieved")
268
269
270    def getLatestTermVersions(self):
271        '''
272        Retrieve the latest versions of the vocab terms required
273        - NB, refreshes the data on an hourly basis, if necessary
274        '''
275        # now need to parse the returned data to get the current version number
276        for termName in self.TERM_DATA:
277            if termName not in self.latestTermVersion:
278                self.getLatestTermVersion(termName)
279
280                   
281    def getLatestTermVersion(self, termName):
282        '''
283        Parse the vocab server data and determine the latest version number of the term with the specified name
284        @param termName: name of term whose current version needs to be established
285        '''
286        logging.info("Getting latest term version for term, '%s'" %termName)
287        # check for recent data - NB, the term will probably share a base url with other terms
288        # so may have already have the correct URL set up
289        if termName not in self.latestTermVersion or self.REFRESH_TIME < time.time():
290   
291            uri = self.TERM_DATA[termName].vocabURL
292            currentVersion = self._getURIVersion(uri)
293   
294            self.latestTermVersion[termName] = str(currentVersion)
295        logging.info("Latest term version for term, '%s' retrieved" %termName)
296
297
298    def _getURIVersion(self, uri):
299        '''
300        Parse the server data for a specified uri and return the latest version number of it
301        @param uri: uri to look for
302        @raise ValueError: if specified uri not found in vocab server
303        @return version of vocab uri, '' if uri contains latest version and None if uri not found
304        '''
305        # allow the data to be refreshed on an hourly basis
306        pageData = self._getVocabDataPage()
307       
308        uri = uri.replace('/term/','/list/')
309        if uri.endswith('/'):
310            uri = uri.rstrip('/')
311        # NB, the uri may already have a version number included in it - so allow this in the search
312        regExp = re.compile(r'(' + uri + '(/(\d{1,}))?)')
313        currentVersion = None
314        foundTerm = False
315        for termVals in regExp.findall(pageData):
316            foundTerm = True
317            version = termVals[2]
318            # version number must be included in original uri, if match found without a version number
319            # being split out - so ignore this - since the original uri is already specified in full
320            if not version:
321                currentVersion = ''
322                break
323
324            # TODO: check how the versioning system will be done - i.e. 3.2.4 type versioning will cause this to break
325            if not currentVersion:
326                currentVersion = int(version)
327            elif int(version) > currentVersion:
328                currentVersion = int(version)
329
330        if currentVersion == None:
331            errorMessage = "Could not find information in vocab server for uri, '%s' - exiting" %uri
332            logging.error(errorMessage)
333            # TODO: uncomment the ValueError once all the vocab terms have been defined
334            #raise ValueError(errorMessage)
335
336        return currentVersion
337
338       
339    def getTermCurrentVocabURL(self, termName):
340        '''
341        Get the current URL on the vocab server to the specified term
342        @param termName: name of term whose URL to return
343        '''
344        logging.debug("Looking up vocab data for term: '%s'" %termName)
345       
346        if termName not in self.TERM_DATA:
347            errorMessage = "Could not find term, '%s' in defined list of valid vocab terms - exiting" %termName
348            logging.error(errorMessage)
349            raise ValueError(errorMessage)
350       
351        if termName not in self.latestTermVersion:
352            self.getLatestTermVersion(termName)
353
354        uri = self.TERM_DATA[termName].vocabURL + \
355            "/" + self.latestTermVersion[termName] + "/" + \
356            self.TERM_DATA[termName].termID
357        logging.debug("Returning vocab URL: '%s'" %uri)
358        return  uri
359   
360   
361    def getCurrentVocabURI(self, uri):
362        '''
363        Look up a specified URI and return the current version of it
364        '''
365        logging.debug("Looking up current version of uri: '%s'" %uri)
366        currentVersion = self._getURIVersion(uri)
367        logging.debug("URI version looked up")
368        if currentVersion:
369            if not uri.endswith('/'):
370                uri += '/'
371            return uri + str(currentVersion)
372        return uri
Note: See TracBrowser for help on using the repository browser.