source: exist/trunk/python/ndgUtils/vocabtermdata.py @ 4218

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/vocabtermdata.py@4218
Revision 4218, 16.9 KB checked in by cbyrom, 12 years ago (diff)

Finish adding details of all the subtype vocab terms.

Line 
1#!/usr/bin/env python
2'''
3 Class to store and access the various vocab term data
4 
5 @author: C Byrom, Tessella Jul 2008
6'''
7import sys, logging, commands, string, os, time, re
8import urllib
9
10class VocabTermItem(object):
11    '''
12    Class representing single vocab term item
13    '''
14    def __init__(self, vocabURL, termID, title=None):
15        self.vocabURL = vocabURL
16        self.termID = termID
17        self.title = title
18
19
20class VocabTermData(object):
21    '''
22    Class representing vocab term data - including
23    methods to look these up to ensure they are current
24    '''
25
26    URI_TERM = 'URI'
27    LOGO_TERM = 'LOGO'
28    NUM_SIM_TERM = 'NumSim'
29    OPENDAP_TERM = 'OPENDAP'
30    THREDDS_TERM = 'THREDDS'
31    WMS_TERM = 'WMS'
32    WCS_TERM = 'WCS'
33    WFS_TERM = 'WFS'
34    LAS_TERM = 'LAS'
35    DATA_EXTRACTOR_TERM = 'DataExtractor' 
36    FILE_BROWSER_TERM = 'FileBrowser'   
37    CSML_TERM = 'CSML'
38    ACTIVITY_TERM = 'ACTIVITY'
39    DPT_TERM = 'DPT'
40    OBS_TERM = 'OBS'
41    GRANULE_TERM = 'GRANULE'
42    DE_TERM = "DE"
43   
44    # dpt subtypes
45    LIDAR_TERM = "dgLidar"
46    RADAR_TERM = "dgRadar"
47    SONDE_TERM = "dgSonde"
48    NAVIGATION_TERM = "dgNavigation"
49    GAS_CHROMATOGRAPH_TERM = "dgGasChromatograph"
50    SPECTROMETER_TERM = "dgSpectrometer"
51    MASS_SPECTROMETER_TERM = "dgMassSpectrometer"
52    MET_SENSOR_TERM = "dgMetSensor"
53    DOAS_TERM = "dgDOAS"
54    ASOZ_TERM = "dgASOZ"
55    RADIOMETER_TERM = "dgRadiometer"
56    FAGE_TERM = "dgFAGE"
57    IMAGER_TERM = "dgImager"
58    FILTER_TERM = "dgFilter"
59    PARTICLE_COUNTER_TERM = "dgParticleCounter"
60    SAMPLER_TERM = "dgSampler"
61    OTHER_INSTRUMENT_TYPE_TERM = "dgOtherInstrumentType"
62    MODEL_TERM = "dgModel"
63    INSTRUMENT_TERM = "dgInstrument"
64   
65    # de subtypes
66    SIMULATION_TERM = "dgSimulation"
67    ANALYSIS_TERM = "dgAnalysis"
68    MEASUREMENT_TERM = "dgMeasurement"
69   
70    # activity subtypes
71    DATA_COLLECTION_TERM = "dgDataCollection"
72    DATA_PROJECT_TERM = "dgDataProject"
73    DATA_CAMPAIGN_TERM = "dgDataCampaignProject"
74    DATA_INVESTIGATION_TERM = "dgDataInvestigation"
75    FLIGHT_TERM = "dgFlight"
76    CRUISE_TERM = "dgCruise"
77    FUNDING_PROGRAM_TERM = "dgFundingProgram"
78   
79    # obs subtypes
80    STATIONARY_PLATFORM_TERM = "dgStationaryPlatform"
81    MOVING_PLATFORM_TERM = "dgMovingPlatform"
82    LAND_STATION_TERM = "dgLandStation"
83    MOORING_TERM = "dgMooring"
84    STATION_GROUP_TERM = "dgStationGroup"
85    SHIP_TERM = "dgShip"
86    AIRCRAFT_TERM = "dgAircraft"
87    SATELLITE_TERM = "dgSatellite"
88    COMPUTER_TERM = "dgComputer"
89
90    TERM_DATA = {
91                 URI_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '3', title = 'Data Home Page'),
92                 LOGO_TERM:VocabTermItem('LOGO', 'LOGO', title = 'Logo'),
93                 NUM_SIM_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '7', title = 'NumSim description'),
94                 OPENDAP_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU010', title = 'GET DATA > OPENDAP DATA (DODS)'),
95                 THREDDS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU013', title = 'GET DATA > THREDDS DATA'),
96                 WMS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU026', title = 'GET SERVICE > GET WEB MAP SERVICE (WMS)'),
97                 WCS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU024', title = 'GET SERVICE > GET WEB COVERAGE SERVICE (WCS)'),
98                 WFS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU025', title = 'GET SERVICE > GET WEB FEATURE SERVICE (WFS)'),
99                 LAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU006', title = 'GET DATA > LAS'),
100                 DATA_EXTRACTOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '8', title = 'NDG DataExtractor'),
101                 FILE_BROWSER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '9', title = 'File Browser'),
102                 CSML_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N021', '1'),
103                 ACTIVITY_TERM:VocabTermItem('Activity - NOT YET SET UP', ACTIVITY_TERM, title = 'Activity'),
104                 DPT_TERM:VocabTermItem('DPT - NOT YET SET UP', DPT_TERM, title = 'Data Production Tool'),
105                 OBS_TERM:VocabTermItem('OBS - NOT YET SET UP', OBS_TERM, title = 'Observation Station'),
106                 GRANULE_TERM:VocabTermItem('GRAN - NOT YET SET UP', GRANULE_TERM, title = 'Data Granule'),
107                 DE_TERM:VocabTermItem('DE - NOT YET SET UP', DE_TERM, title = 'Data Entity'),
108                 
109                 LIDAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG02', title = LIDAR_TERM),
110                 RADAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG03', title = RADAR_TERM),
111                 SONDE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG04', title = SONDE_TERM),
112                 NAVIGATION_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG05', title = NAVIGATION_TERM),
113                 GAS_CHROMATOGRAPH_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG06', title = GAS_CHROMATOGRAPH_TERM),
114                 SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG07', title = SPECTROMETER_TERM),
115                 MASS_SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG08', title = MASS_SPECTROMETER_TERM),
116                 MET_SENSOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG09', title = _TERM),
117                 DOAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG10', title = DOAS_TERM),
118                 ASOZ_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG11', title = ASOZ_TERM),
119                 RADIOMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG12', title = RADIOMETER_TERM),
120                 FAGE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG13', title = FAGE_TERM),
121                 IMAGER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG14', title = IMAGER_TERM),
122                 FILTER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG15', title = FILTER_TERM),
123                 PARTICLE_COUNTER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG16', title = PARTICLE_COUNTER_TERM),
124                 SAMPLER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG17', title = SAMPLER_TERM),
125                 OTHER_INSTRUMENT_TYPE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG99', title = OTHER_INSTRUMENT_TYPE_TERM),
126                 MODEL_TERM:VocabTermItem('dgModel - NOT YET SET UP', MODEL_TERM, title = MODEL_TERM),
127                 INSTRUMENT_TERM:VocabTermItem('dgInstrument - NOT YET SET UP', INSTRUMENT_TERM, title = INSTRUMENT_TERM),
128                 
129                 SIMULATION_TERM:VocabTermItem('NOT YET SET UP', SIMULATION_TERM, title = SIMULATION_TERM),
130                 ANALYSIS_TERM:VocabTermItem('NOT YET SET UP', ANALYSIS_TERM, title = ANALYSIS_TERM),
131                 MEASUREMENT_TERM:VocabTermItem('NOT YET SET UP', MEASUREMENT_TERM, title = MEASUREMENT_TERM),
132                 DATA_COLLECTION_TERM:VocabTermItem('NOT YET SET UP', DATA_COLLECTION_TERM, title = DATA_COLLECTION_TERM),
133                 DATA_PROJECT_TERM:VocabTermItem('NOT YET SET UP', DATA_PROJECT_TERM, title = DATA_PROJECT_TERM),
134                 DATA_CAMPAIGN_TERM:VocabTermItem('NOT YET SET UP', DATA_CAMPAIGN_TERM, title = DATA_CAMPAIGN_TERM),
135                 DATA_INVESTIGATION_TERM:VocabTermItem('NOT YET SET UP', DATA_INVESTIGATION_TERM, title = DATA_INVESTIGATION_TERM),
136                 FLIGHT_TERM:VocabTermItem('NOT YET SET UP', FLIGHT_TERM, title = FLIGHT_TERM),
137                 CRUISE_TERM:VocabTermItem('NOT YET SET UP', CRUISE_TERM, title = CRUISE_TERM),
138                 FUNDING_PROGRAM_TERM:VocabTermItem('NOT YET SET UP', FUNDING_PROGRAM_TERM, title = FUNDING_PROGRAM_TERM),
139                 
140                 STATIONARY_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', STATIONARY_PLATFORM_TERM, title = STATIONARY_PLATFORM_TERM),
141                 MOVING_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', MOVING_PLATFORM_TERM, title = MOVING_PLATFORM_TERM),
142                 LAND_STATION_TERM:VocabTermItem('NOT YET SET UP', LAND_STATION_TERM, title = LAND_STATION_TERM),
143                 MOORING_TERM:VocabTermItem('NOT YET SET UP', MOORING_TERM, title = MOORING_TERM),
144                 STATION_GROUP_TERM:VocabTermItem('NOT YET SET UP', STATION_GROUP_TERM, title = STATION_GROUP_TERM),
145                 SHIP_TERM:VocabTermItem('NOT YET SET UP', SHIP_TERM, title = SHIP_TERM),
146                 AIRCRAFT_TERM:VocabTermItem('NOT YET SET UP', AIRCRAFT_TERM, title = AIRCRAFT_TERM),
147                 SATELLITE_TERM:VocabTermItem('NOT YET SET UP', SATELLITE_TERM, title = SATELLITE_TERM),
148                 COMPUTER_TERM:VocabTermItem('NOT YET SET UP', COMPUTER_TERM, title = COMPUTER_TERM)
149                 }
150
151
152   
153    # A dictionary to group the various valid subtypes of atoms - grouped by their
154    # main type
155    SUBTYPE_TERMS = {
156                     DPT_TERM: [
157                        TERM_DATA[LIDAR_TERM], TERM_DATA[RADAR_TERM], \
158                        TERM_DATA[SONDE_TERM], TERM_DATA[NAVIGATION_TERM], \
159                        TERM_DATA[GAS_CHROMATOGRAPH_TERM], TERM_DATA[SPECTROMETER_TERM], \
160                        TERM_DATA[MASS_SPECTROMETER_TERM],
161                        TERM_DATA[MET_SENSOR_TERM], TERM_DATA[DOAS_TERM],
162                        TERM_DATA[ASOZ_TERM], TERM_DATA[RADIOMETER_TERM],
163                        TERM_DATA[FAGE_TERM], TERM_DATA[IMAGER_TERM],
164                        TERM_DATA[FILTER_TERM], TERM_DATA[PARTICLE_COUNTER_TERM],
165                        TERM_DATA[SAMPLER_TERM], TERM_DATA[OTHER_INSTRUMENT_TYPE_TERM],
166                        TERM_DATA[MODEL_TERM], TERM_DATA[INSTRUMENT_TERM]
167                        ],
168                     DE_TERM: [
169                        TERM_DATA[SIMULATION_TERM], TERM_DATA[ANALYSIS_TERM], \
170                        TERM_DATA[MEASUREMENT_TERM]       
171                        ],
172                     ACTIVITY_TERM: [
173                        TERM_DATA[DATA_COLLECTION_TERM], TERM_DATA[DATA_PROJECT_TERM], \
174                        TERM_DATA[DATA_CAMPAIGN_TERM], TERM_DATA[DATA_INVESTIGATION_TERM], \
175                        TERM_DATA[FLIGHT_TERM], TERM_DATA[CRUISE_TERM], \
176                        TERM_DATA[FUNDING_PROGRAM_TERM],
177                        ],
178                     OBS_TERM: [
179                        TERM_DATA[STATIONARY_PLATFORM_TERM], TERM_DATA[MOVING_PLATFORM_TERM], \
180                        TERM_DATA[LAND_STATION_TERM], TERM_DATA[MOORING_TERM], \
181                        TERM_DATA[STATION_GROUP_TERM], TERM_DATA[SHIP_TERM], \
182                        TERM_DATA[AIRCRAFT_TERM], TERM_DATA[SATELLITE_TERM], \
183                        TERM_DATA[COMPUTER_TERM] 
184                        ]
185                     }
186             
187
188
189   
190   
191    ONE_HOUR = 3600.0
192   
193    VOCAB_SERVER_URL = 'http://vocab.ndg.nerc.ac.uk/clients/whatLists'
194
195    BROWSE_ROOT_URL = "http://localhost:5000/view/"
196
197    def __init__(self):
198        logging.info("Setting up VocabTermData object")
199        self.VOCAB_DATA_PAGE = None
200        self.REFRESH_TIME = time.time()
201        self.latestTermVersion = {}
202        logging.info("VocabTermData object set up")
203
204
205    def getTermFromTitle(self, title):
206        '''
207        Given a term title/label, get back the related term id
208        @param title: title/label of term id to retrieve
209        @raise ValueError: if more than one title or no title is returned 
210        '''
211        termID = []
212        for val in self.TERM_DATA.itervalues():
213            if val.title == title:
214                termID.append(val.termID)
215       
216        if len(termID) != 1:
217            errorMessage = "Error: could not accurately determine the vocab term \
218                ID for the label, '%s' - %s values returned" %(title, len(termID))
219            logging.error(errorMessage)
220            raise ValueError(errorMessage)
221
222        return termID[0]
223       
224
225    def _getVocabDataPage(self):
226        '''
227        Getter method to allow regular refreshing of data
228        '''
229        if self.REFRESH_TIME < time.time():
230            self.getVocabServerData()
231       
232        return self.VOCAB_DATA_PAGE
233
234   
235    def getVocabServerData(self):
236        '''
237        Retrieve the web page containing the versioning info for the various vocab terms
238        '''
239        logging.info("Retrieving vocab server data page")
240        f = urllib.urlopen(self.VOCAB_SERVER_URL)
241        self.VOCAB_DATA_PAGE = f.read()
242        f.close()
243        self.REFRESH_TIME = time.time() + self.ONE_HOUR
244        logging.info("Vocab server data retrieved")
245
246
247    def getLatestTermVersions(self):
248        '''
249        Retrieve the latest versions of the vocab terms required
250        - NB, refreshes the data on an hourly basis, if necessary
251        '''
252        # now need to parse the returned data to get the current version number
253        for termName in self.TERM_DATA:
254            if termName not in self.latestTermVersion:
255                self.getLatestTermVersion(termName)
256
257                   
258    def getLatestTermVersion(self, termName):
259        '''
260        Parse the vocab server data and determine the latest version number of the term with the specified name
261        @param termName: name of term whose current version needs to be established
262        '''
263        logging.info("Getting latest term version for term, '%s'" %termName)
264        # check for recent data - NB, the term will probably share a base url with other terms
265        # so may have already have the correct URL set up
266        if termName not in self.latestTermVersion or self.REFRESH_TIME < time.time():
267   
268            uri = self.TERM_DATA[termName].vocabURL
269            currentVersion = self._getURIVersion(uri)
270   
271            self.latestTermVersion[termName] = str(currentVersion)
272        logging.info("Latest term version for term, '%s' retrieved" %termName)
273
274
275    def _getURIVersion(self, uri):
276        '''
277        Parse the server data for a specified uri and return the latest version number of it
278        @param uri: uri to look for
279        @raise ValueError: if specified uri not found in vocab server
280        @return version of vocab uri, '' if uri contains latest version and None if uri not found
281        '''
282        # allow the data to be refreshed on an hourly basis
283        pageData = self._getVocabDataPage()
284       
285        uri = uri.replace('/term/','/list/')
286        if uri.endswith('/'):
287            uri = uri.rstrip('/')
288        # NB, the uri may already have a version number included in it - so allow this in the search
289        regExp = re.compile(r'(' + uri + '(/(\d{1,}))?)')
290        currentVersion = None
291        foundTerm = False
292        for termVals in regExp.findall(pageData):
293            foundTerm = True
294            version = termVals[2]
295            # version number must be included in original uri, if match found without a version number
296            # being split out - so ignore this - since the original uri is already specified in full
297            if not version:
298                currentVersion = ''
299                break
300
301            # TODO: check how the versioning system will be done - i.e. 3.2.4 type versioning will cause this to break
302            if not currentVersion:
303                currentVersion = int(version)
304            elif int(version) > currentVersion:
305                currentVersion = int(version)
306
307        if currentVersion == None:
308            errorMessage = "Could not find information in vocab server for uri, '%s' - exiting" %uri
309            logging.error(errorMessage)
310            # TODO: uncomment the ValueError once all the vocab terms have been defined
311            #raise ValueError(errorMessage)
312
313        return currentVersion
314
315       
316    def getTermCurrentVocabURL(self, termName):
317        '''
318        Get the current URL on the vocab server to the specified term
319        @param termName: name of term whose URL to return
320        '''
321        logging.debug("Looking up vocab data for term: '%s'" %termName)
322       
323        if termName not in self.TERM_DATA:
324            errorMessage = "Could not find term, '%s' in defined list of valid vocab terms - exiting" %termName
325            logging.error(errorMessage)
326            raise ValueError(errorMessage)
327       
328        if termName not in self.latestTermVersion:
329            self.getLatestTermVersion(termName)
330
331        uri = self.TERM_DATA[termName].vocabURL + \
332            "/" + self.latestTermVersion[termName] + "/" + \
333            self.TERM_DATA[termName].termID
334        logging.debug("Returning vocab URL: '%s'" %uri)
335        return  uri
336   
337   
338    def getCurrentVocabURI(self, uri):
339        '''
340        Look up a specified URI and return the current version of it
341        '''
342        logging.debug("Looking up current version of uri: '%s'" %uri)
343        currentVersion = self._getURIVersion(uri)
344        logging.debug("URI version looked up")
345        if currentVersion:
346            if not uri.endswith('/'):
347                uri += '/'
348            return uri + str(currentVersion)
349        return uri
Note: See TracBrowser for help on using the repository browser.