source: exist/trunk/python/ndgUtils/vocabtermdata.py @ 4217

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/vocabtermdata.py@4217
Revision 4217, 15.7 KB checked in by cbyrom, 11 years ago (diff)

Adjust atom to include atom type term ID as well as the vocab url - and
update the vocab data to include the terms for the various different
atom subtypes.

Line 
1#!/usr/bin/env python
2'''
3 Class to store and access the various vocab term data
4 
5 @author: C Byrom, Tessella Jul 2008
6'''
7import sys, logging, commands, string, os, time, re
8import urllib
9
10class VocabTermItem(object):
11    '''
12    Class representing single vocab term item
13    '''
14    def __init__(self, vocabURL, termID, title=None):
15        self.vocabURL = vocabURL
16        self.termID = termID
17        self.title = title
18
19
20class VocabTermData(object):
21    '''
22    Class representing vocab term data - including
23    methods to look these up to ensure they are current
24    '''
25
26    URI_TERM = 'URI'
27    LOGO_TERM = 'LOGO'
28    NUM_SIM_TERM = 'NumSim'
29    OPENDAP_TERM = 'OPENDAP'
30    THREDDS_TERM = 'THREDDS'
31    WMS_TERM = 'WMS'
32    WCS_TERM = 'WCS'
33    WFS_TERM = 'WFS'
34    LAS_TERM = 'LAS'
35    DATA_EXTRACTOR_TERM = 'DataExtractor' 
36    FILE_BROWSER_TERM = 'FileBrowser'   
37    CSML_TERM = 'CSML'
38    ACTIVITY_TERM = 'ACTIVITY'
39    DPT_TERM = 'DPT'
40    OBS_TERM = 'OBS'
41    GRANULE_TERM = 'GRANULE'
42    DE_TERM = "DE"
43   
44    LIDAR_TERM = "dgLidar"
45    RADAR_TERM = "dgRadar"
46    SONDE_TERM = "dgSonde"
47    NAVIGATION_TERM = "dgNavigation"
48    GAS_CHROMATOGRAPH_TERM = "dgGasChromatograph"
49    SPECTROMETER_TERM = "dgSpectrometer"
50    MASS_SPECTROMETER_TERM = "dgMassSpectrometer"
51   
52    SIMULATION_TERM = "dgSimulation"
53    ANALYSIS_TERM = "dgAnalysis"
54    MEASUREMENT_TERM = "dgMeasurement"
55   
56    DATA_COLLECTION_TERM = "dgDataCollection"
57    DATA_PROJECT_TERM = "dgDataProject"
58    DATA_CAMPAIGN_TERM = "dgDataCampaignProject"
59    DATA_INVESTIGATION_TERM = "dgDataInvestigation"
60    FLIGHT_TERM = "dgFlight"
61    CRUISE_TERM = "dgCruise"
62    FUNDING_PROGRAM_TERM = "dgFundingProgram"
63   
64    STATIONARY_PLATFORM_TERM = "dgStationaryPlatform"
65    MOVING_PLATFORM_TERM = "dgMovingPlatform"
66    LAND_STATION_TERM = "dgLandStation"
67    MOORING_TERM = "dgMooring"
68    STATION_GROUP_TERM = "dgStationGroup"
69    SHIP_TERM = "dgShip"
70    AIRCRAFT_TERM = "dgAircraft"
71    SATELLITE_TERM = "dgSatellite"
72    COMPUTER_TERM = "dgComputer"
73
74    TERM_DATA = {
75                 URI_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '3', title = 'Data Home Page'),
76                 LOGO_TERM:VocabTermItem('LOGO', 'LOGO', title = 'Logo'),
77                 NUM_SIM_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '7', title = 'NumSim description'),
78                 OPENDAP_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU010', title = 'GET DATA > OPENDAP DATA (DODS)'),
79                 THREDDS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU013', title = 'GET DATA > THREDDS DATA'),
80                 WMS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU026', title = 'GET SERVICE > GET WEB MAP SERVICE (WMS)'),
81                 WCS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU024', title = 'GET SERVICE > GET WEB COVERAGE SERVICE (WCS)'),
82                 WFS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU025', title = 'GET SERVICE > GET WEB FEATURE SERVICE (WFS)'),
83                 LAS_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/P201', 'GCMDU006', title = 'GET DATA > LAS'),
84                 DATA_EXTRACTOR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '8', title = 'NDG DataExtractor'),
85                 FILE_BROWSER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N041', '9', title = 'File Browser'),
86                 CSML_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/N021', '1'),
87                 ACTIVITY_TERM:VocabTermItem('Activity - NOT YET SET UP', 'Activity', title = 'Activity'),
88                 DPT_TERM:VocabTermItem('DPT - NOT YET SET UP', 'DPT', title = 'Data Production Tool'),
89                 OBS_TERM:VocabTermItem('OBS - NOT YET SET UP', 'OBS', title = 'Observation Station'),
90                 GRANULE_TERM:VocabTermItem('GRAN - NOT YET SET UP', 'GRAN', title = 'Data Granule'),
91                 DE_TERM:VocabTermItem('DE - NOT YET SET UP', 'DE', title = 'Data Entity'),
92                 LIDAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG02', title = LIDAR_TERM),
93                 RADAR_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG03', title = RADAR_TERM),
94                 SONDE_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG04', title = SONDE_TERM),
95                 NAVIGATION_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG05', title = NAVIGATION_TERM),
96                 GAS_CHROMATOGRAPH_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG06', title = GAS_CHROMATOGRAPH_TERM),
97                 SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG07', title = SPECTROMETER_TERM),
98                 MASS_SPECTROMETER_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG08', title = MASS_SPECTROMETER_TERM),
99                 SIMULATION_TERM:VocabTermItem('NOT YET SET UP', SIMULATION_TERM, title = SIMULATION_TERM),
100                 ANALYSIS_TERM:VocabTermItem('NOT YET SET UP', ANALYSIS_TERM, title = ANALYSIS_TERM),
101                 MEASUREMENT_TERM:VocabTermItem('NOT YET SET UP', MEASUREMENT_TERM, title = MEASUREMENT_TERM),
102                 DATA_COLLECTION_TERM:VocabTermItem('NOT YET SET UP', DATA_COLLECTION_TERM, title = DATA_COLLECTION_TERM),
103                 DATA_PROJECT_TERM:VocabTermItem('NOT YET SET UP', DATA_PROJECT_TERM, title = DATA_PROJECT_TERM),
104                 DATA_CAMPAIGN_TERM:VocabTermItem('NOT YET SET UP', DATA_CAMPAIGN_TERM, title = DATA_CAMPAIGN_TERM),
105                 DATA_INVESTIGATION_TERM:VocabTermItem('NOT YET SET UP', DATA_INVESTIGATION_TERM, title = DATA_INVESTIGATION_TERM),
106                 FLIGHT_TERM:VocabTermItem('NOT YET SET UP', FLIGHT_TERM, title = FLIGHT_TERM),
107                 CRUISE_TERM:VocabTermItem('NOT YET SET UP', CRUISE_TERM, title = CRUISE_TERM),
108                 FUNDING_PROGRAM_TERM:VocabTermItem('NOT YET SET UP', FUNDING_PROGRAM_TERM, title = FUNDING_PROGRAM_TERM),
109                 
110                 STATIONARY_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', STATIONARY_PLATFORM_TERM, title = STATIONARY_PLATFORM_TERM),
111                 MOVING_PLATFORM_TERM:VocabTermItem('NOT YET SET UP', MOVING_PLATFORM_TERM, title = MOVING_PLATFORM_TERM),
112                 LAND_STATION_TERM:VocabTermItem('NOT YET SET UP', LAND_STATION_TERM, title = LAND_STATION_TERM),
113                 MOORING_TERM:VocabTermItem('NOT YET SET UP', MOORING_TERM, title = MOORING_TERM),
114                 STATION_GROUP_TERM:VocabTermItem('NOT YET SET UP', STATION_GROUP_TERM, title = STATION_GROUP_TERM),
115                 SHIP_TERM:VocabTermItem('NOT YET SET UP', SHIP_TERM, title = SHIP_TERM),
116                 AIRCRAFT_TERM:VocabTermItem('NOT YET SET UP', AIRCRAFT_TERM, title = AIRCRAFT_TERM),
117                 SATELLITE_TERM:VocabTermItem('NOT YET SET UP', SATELLITE_TERM, title = SATELLITE_TERM),
118                 COMPUTER_TERM:VocabTermItem('NOT YET SET UP', COMPUTER_TERM, title = COMPUTER_TERM),
119                 
120                 #_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG09', title = _TERM),
121                 #_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG10', title = _TERM),
122                 #_TERM:VocabTermItem('http://vocab.ndg.nerc.ac.uk/term/C330', 'NG1', title = _TERM),
123                 }
124
125   
126             #'dgMetSensor':'http://vocab.ndg.nerc.ac.uk/term/C330/0/NG09',
127             #'dgDOAS':'http://vocab.ndg.nerc.ac.uk/term/C330/0/NG10',
128             #'dgASOZ':'http://vocab.ndg.nerc.ac.uk/term/C330/0/NG11',
129             #'dgRadiometer':'http://vocab.ndg.nerc.ac.uk/term/C330/0/NG12',
130             #'dgFAGE':'http://vocab.ndg.nerc.ac.uk/term/C330/0/NG13',
131             #'dgImager':'http://vocab.ndg.nerc.ac.uk/term/C330/0/NG14',
132             #'dgFilter':'http://vocab.ndg.nerc.ac.uk/term/C330/0/NG15',
133             #'dgParticleCounter':'http://vocab.ndg.nerc.ac.uk/term/C330/0/NG16',
134             #'dgSampler':'http://vocab.ndg.nerc.ac.uk/term/C330/0/NG17',
135             #'dgOtherInstrumentType':'http://vocab.ndg.nerc.ac.uk/term/C330/0/NG99',
136             #'dgModel':'dgModel - NOT YET SET UP',
137             #'dgInstrument':'dgInstrument - NOT YET SET UP'
138
139
140   
141    # A dictionary to group the various valid subtypes of atoms - grouped by their
142    # main type
143    SUBTYPE_TERMS = {
144                     DPT_TERM: [
145                        TERM_DATA[LIDAR_TERM], TERM_DATA[RADAR_TERM], \
146                        TERM_DATA[SONDE_TERM], TERM_DATA[NAVIGATION_TERM], \
147                        TERM_DATA[GAS_CHROMATOGRAPH_TERM], TERM_DATA[SPECTROMETER_TERM], \
148                        TERM_DATA[MASS_SPECTROMETER_TERM]
149                        ],
150
151                     DE_TERM: [
152                        TERM_DATA[SIMULATION_TERM], TERM_DATA[ANALYSIS_TERM], \
153                        TERM_DATA[MEASUREMENT_TERM]       
154                        ],
155                     ACTIVITY_TERM: [
156                        TERM_DATA[DATA_COLLECTION_TERM], TERM_DATA[DATA_PROJECT_TERM], \
157                        TERM_DATA[DATA_CAMPAIGN_TERM], TERM_DATA[DATA_INVESTIGATION_TERM], \
158                        TERM_DATA[FLIGHT_TERM], TERM_DATA[CRUISE_TERM], \
159                        TERM_DATA[FUNDING_PROGRAM_TERM],
160                        ],
161                     OBS_TERM: [
162                        TERM_DATA[STATIONARY_PLATFORM_TERM], TERM_DATA[MOVING_PLATFORM_TERM], \
163                        TERM_DATA[LAND_STATION_TERM], TERM_DATA[MOORING_TERM], \
164                        TERM_DATA[STATION_GROUP_TERM], TERM_DATA[SHIP_TERM], \
165                        TERM_DATA[AIRCRAFT_TERM], TERM_DATA[SATELLITE_TERM], \
166                        TERM_DATA[COMPUTER_TERM] 
167                        ]
168                     }
169             
170
171
172   
173   
174    ONE_HOUR = 3600.0
175   
176    VOCAB_SERVER_URL = 'http://vocab.ndg.nerc.ac.uk/clients/whatLists'
177
178    BROWSE_ROOT_URL = "http://localhost:5000/view/"
179
180    def __init__(self):
181        logging.info("Setting up VocabTermData object")
182        self.VOCAB_DATA_PAGE = None
183        self.REFRESH_TIME = time.time()
184        self.latestTermVersion = {}
185        logging.info("VocabTermData object set up")
186
187
188    def getTermFromTitle(self, title):
189        '''
190        Given a term title/label, get back the related term id
191        @param title: title/label of term id to retrieve
192        @raise ValueError: if more than one title or no title is returned 
193        '''
194        termID = []
195        for val in self.TERM_DATA.itervalues():
196            if val.title == title:
197                termID.append(val.termID)
198       
199        if len(termID) != 1:
200            errorMessage = "Error: could not accurately determine the vocab term \
201                ID for the label, '%s' - %s values returned" %(title, len(termID))
202            logging.error(errorMessage)
203            raise ValueError(errorMessage)
204
205        return termID[0]
206       
207
208    def _getVocabDataPage(self):
209        '''
210        Getter method to allow regular refreshing of data
211        '''
212        if self.REFRESH_TIME < time.time():
213            self.getVocabServerData()
214       
215        return self.VOCAB_DATA_PAGE
216
217   
218    def getVocabServerData(self):
219        '''
220        Retrieve the web page containing the versioning info for the various vocab terms
221        '''
222        logging.info("Retrieving vocab server data page")
223        f = urllib.urlopen(self.VOCAB_SERVER_URL)
224        self.VOCAB_DATA_PAGE = f.read()
225        f.close()
226        self.REFRESH_TIME = time.time() + self.ONE_HOUR
227        logging.info("Vocab server data retrieved")
228
229
230    def getLatestTermVersions(self):
231        '''
232        Retrieve the latest versions of the vocab terms required
233        - NB, refreshes the data on an hourly basis, if necessary
234        '''
235        # now need to parse the returned data to get the current version number
236        for termName in self.TERM_DATA:
237            if termName not in self.latestTermVersion:
238                self.getLatestTermVersion(termName)
239
240                   
241    def getLatestTermVersion(self, termName):
242        '''
243        Parse the vocab server data and determine the latest version number of the term with the specified name
244        @param termName: name of term whose current version needs to be established
245        '''
246        logging.info("Getting latest term version for term, '%s'" %termName)
247        # check for recent data - NB, the term will probably share a base url with other terms
248        # so may have already have the correct URL set up
249        if termName not in self.latestTermVersion or self.REFRESH_TIME < time.time():
250   
251            uri = self.TERM_DATA[termName].vocabURL
252            currentVersion = self._getURIVersion(uri)
253   
254            self.latestTermVersion[termName] = str(currentVersion)
255        logging.info("Latest term version for term, '%s' retrieved" %termName)
256
257
258    def _getURIVersion(self, uri):
259        '''
260        Parse the server data for a specified uri and return the latest version number of it
261        @param uri: uri to look for
262        @raise ValueError: if specified uri not found in vocab server
263        @return version of vocab uri, '' if uri contains latest version and None if uri not found
264        '''
265        # allow the data to be refreshed on an hourly basis
266        pageData = self._getVocabDataPage()
267       
268        uri = uri.replace('/term/','/list/')
269        if uri.endswith('/'):
270            uri = uri.rstrip('/')
271        # NB, the uri may already have a version number included in it - so allow this in the search
272        regExp = re.compile(r'(' + uri + '(/(\d{1,}))?)')
273        currentVersion = None
274        foundTerm = False
275        for termVals in regExp.findall(pageData):
276            foundTerm = True
277            version = termVals[2]
278            # version number must be included in original uri, if match found without a version number
279            # being split out - so ignore this - since the original uri is already specified in full
280            if not version:
281                currentVersion = ''
282                break
283
284            # TODO: check how the versioning system will be done - i.e. 3.2.4 type versioning will cause this to break
285            if not currentVersion:
286                currentVersion = int(version)
287            elif int(version) > currentVersion:
288                currentVersion = int(version)
289
290        if currentVersion == None:
291            errorMessage = "Could not find information in vocab server for uri, '%s' - exiting" %uri
292            logging.error(errorMessage)
293            # TODO: uncomment the ValueError once all the vocab terms have been defined
294            #raise ValueError(errorMessage)
295
296        return currentVersion
297
298       
299    def getTermCurrentVocabURL(self, termName):
300        '''
301        Get the current URL on the vocab server to the specified term
302        @param termName: name of term whose URL to return
303        '''
304        logging.debug("Looking up vocab data for term: '%s'" %termName)
305       
306        if termName not in self.TERM_DATA:
307            errorMessage = "Could not find term, '%s' in defined list of valid vocab terms - exiting" %termName
308            logging.error(errorMessage)
309            raise ValueError(errorMessage)
310       
311        if termName not in self.latestTermVersion:
312            self.getLatestTermVersion(termName)
313
314        uri = self.TERM_DATA[termName].vocabURL + \
315            "/" + self.latestTermVersion[termName] + "/" + \
316            self.TERM_DATA[termName].termID
317        logging.debug("Returning vocab URL: '%s'" %uri)
318        return  uri
319   
320   
321    def getCurrentVocabURI(self, uri):
322        '''
323        Look up a specified URI and return the current version of it
324        '''
325        logging.debug("Looking up current version of uri: '%s'" %uri)
326        currentVersion = self._getURIVersion(uri)
327        logging.debug("URI version looked up")
328        if currentVersion:
329            if not uri.endswith('/'):
330                uri += '/'
331            return uri + str(currentVersion)
332        return uri
Note: See TracBrowser for help on using the repository browser.