source: TI03-DataExtractor/branches/repackaging/server/lib/ndg/dx/server/MIDASDataHandler.py @ 1774

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI03-DataExtractor/branches/repackaging/server/lib/ndg/dx/server/MIDASDataHandler.py@1774
Revision 1774, 15.2 KB checked in by spascoe, 13 years ago (diff)

Changed all references to serverConfig to point to ndg.utils.config.

Line 
1#   Copyright (C) 2004 CCLRC & NERC( Natural Environment Research Council ).
2#   This software may be distributed under the terms of the
3#   Q Public License, version 1.0 or later. http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
4
5"""
6MIDASDataHandler.py
7===================
8
9MIDASDataHandler module for the dx package.
10
11This module holds the MIDASDataHandler class that is used
12to hold and access information about datasets held in BADC's
13MIDAS Database clone (set of flat files).
14
15"""
16
17# Import required modules
18import os
19import cdms
20import re
21
22# Import global variables
23from common import *
24from DXDMLHandler import DXDMLHandler
25from DXErrors import *
26import DateTimeManager
27import sys
28sys.path.append("/home/badc/software/datasets/ukmo-midas/scripts/extract")
29try:
30    import midasMetadataHandler
31    from midasSubsetter import *
32    from getStations import *
33except:
34    pass
35
36
37# Import configuration
38from ndg.utils.config import config
39configSection = 'ndg.dx.server'
40CF_METADATA_GLOBAL_ATTRIBUTE_KEYS = config.get(configSection, 'CF_METADATA_GLOBAL_ATTRIBUTE_KEYS')
41
42class MIDASDataHandler:
43    """
44    A group of methods to connect to a dataset group or
45    dataset to extract information about the contents.
46    """
47 
48    def __init__(self, datasetURI=None):
49        """
50        Set up instance variables.
51        """
52        self.DXDML=DXDMLHandler()
53        self.file=datasetURI
54        if self.file: self._openDataFile(datasetURI=self.file)
55   
56   
57    def _openDataFile(self, datasetGroup=None, dataset=None, datasetURI=None):
58        """
59        Opens a file and allocates to file handle called: self.file.
60        """
61        pass
62
63
64    def _getVariable(self, varname):
65        """
66        Gets variable metadata object from a data file.
67        """
68        pass
69       
70
71    def _getBestName(self, v, vcount=0):
72        """
73        Returns the best name for a cdms variable.
74        """
75        return v
76
77
78    def getVariables(self, datasetGroup=None, dataset=None, datasetURI=None):
79        """
80        Returns a list of variables for the given dataset
81        group/dataset combination or datasetURI. The variable name used is selected
82        hierarchically depending on the available attributes. Each returned item in
83        the list includes a [<long_name>, <id>].
84        """ 
85        rtvars=[["All variables [mandatory]","all-vars"]]
86        return rtvars
87
88
89    def getDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
90        """
91        Returns the full domain listing for a variable returning:
92       
93        [knownAxisString, id, longName, units, listType, unusedItem,
94        listValue-1, listValue-2, ..., listValue-n]
95       
96        For example:
97       
98        ["time", "time", "Time", "hours since 1999-09-09 00:00:00", "start end interval",
99        "", 0, 3, 6]
100       
101        This listType represents 6-hourly time steps of 0,1,2,3 past the base time.
102       
103        listType can also take the value "full list" where all values in the list are provided,
104        or "start end" where only the first and last value are given.
105        """ 
106        rtlist=[]
107        tableID=midasMetadataHandler.uriMap(dataset)
108        (timeDomain, bbox, areas)=midasMetadataHandler.getTableSummary(tableID)
109
110        latLimits=(bbox[0], bbox[2])
111        lonLimits=(bbox[1], bbox[3])
112       
113        """if dataset.find("UK")>-1:
114            lonLimits=(-12,3)
115            latLimits=(49,61)
116        else: # global
117            lonLimits=(0,360)
118            latLimits=(-90,90)"""
119
120        lon=["longitude","longitude","Longitude","degrees_east","start end","",lonLimits[0], lonLimits[1]]
121        lat=["latitude","latitude","Latitude","degrees_north","start end","",latLimits[0], latLimits[1]]
122
123        (startTime, endTime, timeInterval, timeUnits)=timeDomain # midasMetadataHandler.getTemporalInfo(tableID)
124        time=["time","time","Time",timeUnits,"start end interval","",startTime, endTime, timeInterval]
125        countyList=areas # midasMetadataHandler.getCounties()
126        counties=["county","county","Counties","N/A","full list","Note that selection of items in this category will override the lat/lon selection"]+countyList
127        src_ids=["src_id","src_id","WMO Station Numbers","N/A","comma-separated integer1-5","Note that selection of items in this category will override lat/lon and county/country selections",0,0,0]
128
129        countries=["..."]
130           
131        rtlist=[time, lat, lon, counties, src_ids]
132        print rtlist
133        return rtlist
134       
135
136    def getHorizontalDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
137        """
138        Returns the horizontal domain as (northernExtent, westernExtent, southernExtent, easternExtent).
139        """
140        pass
141
142
143    def getVerticalSpatialDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
144        """
145        Returns the vertical domain as a tuple containing
146        a list of levels (or "Single level" string) and the units.
147        """
148        pass
149
150
151    def getTemporalDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
152        """
153        Returns the temporal domain as a tuple of (start time, end time,
154        (interval value, interval units)).
155        """
156        tableID=midasMetadataHandler.uriMap(datasetURI)
157        (start_time, end_time, interval_value, time_units)=midasMetadataHandler.getTemporalInfo(tableID)
158        if time_units[-1]=="s":  time_units=time_units[:-1]
159        return (start_time, end_time, (interval_value, time_units)) 
160
161
162    def getSelectedTimeSteps(self, datasetURI, variable, axisSelectionDict):
163        """
164        Returns a list of time step strings based on the selection.
165        """     
166        timeAxis=self.getTemporalDomain(variable=variable, datasetURI=datasetURI)
167        tableID=midasMetadataHandler.uriMap(datasetURI)
168        (start_time, end_time, interval_value, time_units)=midasMetadataHandler.getTemporalInfo(tableID)
169        startDateTime=None
170
171        timeAxisIndex=1
172
173        for key in axisSelectionDict.keys():
174            axisIndex=int(key.split(".")[-1])
175            if axisIndex==timeAxisIndex:
176                print axisSelectionDict
177                (startDateTime, endDateTime)=axisSelectionDict[key][:2]
178       
179        if startDateTime==None:
180            return [str(tst) for tst in timeAxis.asComponentTime()]
181       
182        startDateTime=startDateTime.replace("T", " ")
183        items=startDateTime.split(":")
184        startDateTime=":".join(items[:-1])+":"+("%f" % float(items[-1]))
185        endDateTime=endDateTime.replace("T", " ")
186        items=endDateTime.split(":")
187        endDateTime=":".join(items[:-1])+":"+("%f" % float(items[-1]))
188       
189        timeSteps=DateTimeManager.createList(getDateTimeComponents(startDateTime), 
190                                             getDateTimeComponents(endDateTime), (interval_value, time_units),
191                                             listtype="tuple")                                     
192        selectedTimes=[]
193
194        for timeStep in timeSteps:
195            ts=timeStep
196            timeStep="%.4d-%.2d-%.2d %.2d:%.2d:%f" % ts
197            ts=str(timeStep)
198            if ts>endDateTime:
199                break
200            elif ts<startDateTime:
201                continue
202            else:
203                selectedTimes.append(ts)
204       
205        if selectedTimes==[]:
206            raise DXOptionHandlingError, "All selected time steps for '%s' are out of range, please go back and re-select." % variable
207               
208        return selectedTimes
209
210
211    def _getMIDASAxisDetails(self, axisSelectionDict):
212        """
213        Returns a dictionary of selections of items with keys:
214        "time", "lat", "lon", "counties", "src_id".
215        """
216        axisMap=["dummy", "time", "lat", "lon", "county", "src_id"]
217        keys=axisSelectionDict.keys()
218        keys.sort()
219
220        src_ids=[]
221        counties=[]
222
223        for key in keys:
224            axisIndex=int(key.split(".")[-1])
225
226            if axisIndex==1:   # time
227                (startTime, endTime)=axisSelectionDict[key]  #midasMetadataHandler.getTemporalInfo(self.file)[:2]
228            elif axisIndex==2: # lat
229                lat=axisSelectionDict[key] #(52,57)
230            elif axisIndex==3: # lon
231                lon=axisSelectionDict[key] #(-5,2)
232            elif axisIndex==4: # county
233                counties=axisSelectionDict[key] #["CORNWALL", "DEVON"]
234            elif axisIndex==5: # src_id
235                src_ids=axisSelectionDict[key] #["30874","55518","55572","55668"]
236
237        d={}
238        d["time"]=(startTime,endTime)
239        d["lat"]=lat
240        d["lon"]=lon
241        d["counties"]=counties
242        d["src_ids"]=src_ids
243        return d
244
245
246
247    def getSelectedVariableArrayDetails(self, datasetURI, variable, axisSelectionDict):
248        """
249        Returns a tuple representing the (array shape, grid shape, size)
250        of the selected subset of a variable. Grid shape can be None if both latitude
251        and longitude axes are not present.
252        """
253        axDict=self._getMIDASAxisDetails(axisSelectionDict)
254
255        (startTime, endTime)=axDict["time"]
256        lat=axDict["lat"]
257        lon=axDict["lon"]
258        counties=axDict["counties"]
259        src_ids=axDict["src_ids"]
260
261        bbox=(lat[0], lon[0], lat[1], lon[1])
262
263        if src_ids==[]:
264            nsrcids=None
265        else:
266            nsrcids=len(src_ids)
267
268        if counties==[]:
269            areaCount=None   
270        else:
271            areaCount=len(counties)
272
273        print startTime, endTime
274     
275        (sy,sm)=(int(startTime[:4]), int(startTime[5:7]))
276        (ey,em)=(int(endTime[:4]), int(endTime[5:7]))
277        print (ey,sm)
278     
279        mc=0
280        if ey==sy:
281            mc=em-sm
282        elif ey==(sy+1):
283            mc=(12-sm)+em
284        else: 
285            mc=(12-sm)+em
286            for y in range(ey-sy-1):
287                mc=mc+12
288       
289        if mc<1: mc=1
290        nmonths=mc
291
292        midYear=(ey-sy)+sy
293
294        tid=midasMetadataHandler.uriMap(datasetURI)
295        (estDur, estVol)=midasMetadataHandler.getMidasExtractionCosts(tid, nmonths, midYear, areaCount=areaCount, bbox=bbox, nsrcids=nsrcids)
296
297        # Add a bit if need to filter by srcid
298        estDur=estDur+30               
299
300        (arrayShape, gridShape, size)=(None, None, estVol)
301        return (arrayShape, gridShape, size)
302
303
304    def getSelectedVariableSubsetSize(self, datasetURI, varID, axisSelectionDict):
305        """
306        Returns the size in bytes of the selected subset of a variable.
307        """
308        return self.getSelectedVariableArrayDetails(datasetURI, varID, axisSelectionDict)[2]
309
310       
311    def subsetMIDASToASCIIFile(self, datasetURI, axisSelectionDict, outputPath, region=None):
312        """
313        Reads the variable with ID 'variable' into memory from file
314        'datasetURI' - sub-setting across all axes indicated in 'axisSelectionDict'.
315        If 'timeStep' is provided then override the time selection in 'axisSelectionDict'
316        with the 'timeStep' given.
317        """
318        # convert axis selection and timeStep*** to args for midasSubsetter
319        tableID=midasMetadataHandler.uriMap(datasetURI)
320
321        axDict=self._getMIDASAxisDetails(axisSelectionDict)
322
323        (startTime, endTime)=axDict["time"]
324        lat=axDict["lat"]
325        lon=axDict["lon"]
326        counties=axDict["counties"]
327        src_ids=axDict["src_ids"]
328
329        bbox=(lat[0], lon[0], lat[1], lon[1])
330
331       
332        print startTime, endTime
333        print "Selected counties:", counties
334
335     
336
337        if src_ids==[]: # Invoke station id getter
338            if counties==[]: 
339                b=list(bbox)
340                if b[2]>b[0]: 
341                    bt=b[0]
342                    b[0]=b[2]
343                    b[2]=bt
344                boundingBox=b
345            else: 
346                boundingBox=None
347            print "BB:", boundingBox
348           
349            if counties!=[]:
350                cunique=[]
351                for c in counties:
352                    if c not in cunique: cunique.append(c)
353                src_ids=StationIDGetter(cunique, bbox=None, dataTypes=[], startTime=startTime,
354                             endTime=endTime, outputFile=None, noprint=1).stList
355            elif boundingBox!=[61.0,-12.0,49.0,3.0]:
356                src_ids=StationIDGetter(counties, boundingBox, dataTypes=[], startTime=startTime,
357                             endTime=endTime, outputFile=None, noprint=1).stList
358            else:
359                src_ids=None
360        else:
361            print "Not calling station ID getter as list provided."
362            # Clean up src_ids
363            newlist=[]
364            for sid in src_ids:
365                ns=str(int(float(sid)))
366                if ns not in newlist: newlist.append(ns)
367                src_ids=newlist
368
369        # Reformat start and end time for MIDAS subsetter
370        patt=re.compile("(\d{4})-(\d+)-(\d+).(\d+):(\d+):(\d+)")
371        st="%.4d%.2d%.2d%.2d%.2d" % tuple([int(i) for i in patt.match(startTime).groups()][:5])
372        et="%.4d%.2d%.2d%.2d%.2d" % tuple([int(i) for i in patt.match(endTime).groups()][:5])
373
374
375        print "\nStart and end times for selection:", st, et
376        print "Writing to output path:", outputPath
377        MIDASSubsetter([tableID], outputPath, startTime=st, endTime=et,   
378                       src_ids=src_ids, region=region)   
379       
380
381    def getCFGlobalAttributes(self, datafile):
382        """
383        Gets any CF metadata global attributes that are available
384        from the source dataset/file.
385        """
386        # Make sure data file is open
387        if self.file==None: self._openDataFile(datasetURI=datafile)
388        gatts={}
389
390        for gatt in CF_METADATA_GLOBAL_ATTRIBUTE_KEYS:
391            if hasattr(self.file, gatt):
392                gatts[gatt]=self.file.__getattr__(gatt)
393       
394        return gatts
395
396
397if __name__=="__main__":       
398    a=MIDASDataHandler()
399
400    a.subsetMIDASToASCIIFile('class://MIDAS/rain-daily', 
401{
402'axis_10.1.1.5': [14958.0, 14958.0], 'axis_10.1.1.2': [49.0, 61.0], 
403'axis_10.1.1.3': [-12.0, 3.0],  'axis_10.1.1.1': ['1989-01-01T00:00:0.000000', 
404'1989-01-05T00:00:0.000000'], 'axis_10.1.1.4': ['HEREFORD & WORCESTER', 'HERTFORDSHIRE', 'HUMBERSIDE'],}, 
405'/requests/astephen/dx_output/ukmo-midas_rain-daily_TEST_all-vars.txt')
406    sys.exit()
407    print a.getVariables(datasetGroup='Met Office - MIDAS Land Surface Observation Stations Data',                     
408                 datasetURI='class://MIDAS/rain-daily')   
409    print a.getDomain('Met Office - MIDAS Land Surface Observation Stations Data', 'class://MIDAS/rain-daily', "all-vars")
410    print a.getSelectedTimeSteps('class://MIDAS/rain-daily', 'all-vars', 
411                   {"axis_10.2.1.1":['1959-01-01T09:00:0.000000', '1959-01-01T23:00:0.000000'],
412                    "axis_10.2.1.2":[61.0, 49.0], "axis_10.2.1.3":[-12.0, 3.0]})
413
414    print a.getSelectedVariableArrayDetails('class://MIDAS/rain-daily', 'all-vars',
415                    {"axis_7.4.1.1":['1968-01-01T11:00:0.000000', '1968-01-01T11:59:0.000000'],
416                     "axis_7.4.1.2":[61.0, 49.0],
417                     "axis_7.4.1.3":[-12.0, 3.0]})
418    """axis_7.4.1.1 = ['1968-01-01T11:00:0.000000', '1968-01-01T11:59:0.000000']
419    axis_7.4.1.2 = [61.0, 49.0]
420    axis_7.4.1.3 = [-12.0, 3.0]
421    datasetGroup_7 = Met Office - MIDAS Land Surface Observation Stations Data
422    dataset_7.1 = UK Daily Rainfall
423    dataset_7.3 = UK Daily Weather
424    dataset_7.4 = UK Hourly Rainfall
425    login = Login
426    outputFormat_7.1.1 = ASCII Text
427    outputFormat_7.3.1 = ASCII Text
428    outputFormat_7.4.1 = ASCII Text
429    """
430    print a.getSelectedVariableArrayDetails('class://MIDAS/rain-daily', "all-vars",
431                     {'axis_1.1.1.1':('1999-01-01T00:00:0.000000', '1999-01-02T00:00:0.000000'), 
432                      'axis_1.1.1.3': [0.0, 355.0], 'axis_1.1.1.2': [-90.0, 90.0]})
433                     
434    print a.subsetMIDASToASCIIFile('class://MIDAS/rain-daily', 
435                     {'axis_1.1.1.1':('1999-01-01T00:00:0.000000', '1999-01-02T00:00:0.000000'), 
436                      'axis_1.1.1.3': [0.0, 355.0], 'axis_1.1.1.2': [-90.0, 90.0]}, "/tmp/stuff.txt")
Note: See TracBrowser for help on using the repository browser.