source: TI03-DataExtractor/trunk/pydxs/MIDASDataHandler.py @ 1715

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI03-DataExtractor/trunk/pydxs/MIDASDataHandler.py@1715
Revision 1715, 15.0 KB checked in by astephen, 13 years ago (diff)

Merged with titania version.

Line 
1#   Copyright (C) 2004 CCLRC & NERC( Natural Environment Research Council ).
2#   This software may be distributed under the terms of the
3#   Q Public License, version 1.0 or later. http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
4
5"""
6MIDASDataHandler.py
7===================
8
9MIDASDataHandler module for the dx package.
10
11This module holds the MIDASDataHandler class that is used
12to hold and access information about datasets held in BADC's
13MIDAS Database clone (set of flat files).
14
15"""
16
17# Import required modules
18import os
19import cdms
20import re
21
22# Import global variables
23from serverConfig import *
24from common import *
25from DXDMLHandler import DXDMLHandler
26from DXErrors import *
27import DateTimeManager
28import sys
29sys.path.append("/home/badc/software/datasets/ukmo-midas/scripts/extract")
30try:
31    import midasMetadataHandler
32    from midasSubsetter import *
33    from getStations import *
34except:
35    pass
36
37
38class MIDASDataHandler:
39    """
40    A group of methods to connect to a dataset group or
41    dataset to extract information about the contents.
42    """
43 
44    def __init__(self, datasetURI=None):
45        """
46        Set up instance variables.
47        """
48        self.DXDML=DXDMLHandler()
49        self.file=datasetURI
50        if self.file: self._openDataFile(datasetURI=self.file)
51   
52   
53    def _openDataFile(self, datasetGroup=None, dataset=None, datasetURI=None):
54        """
55        Opens a file and allocates to file handle called: self.file.
56        """
57        pass
58
59
60    def _getVariable(self, varname):
61        """
62        Gets variable metadata object from a data file.
63        """
64        pass
65       
66
67    def _getBestName(self, v, vcount=0):
68        """
69        Returns the best name for a cdms variable.
70        """
71        return v
72
73
74    def getVariables(self, datasetGroup=None, dataset=None, datasetURI=None):
75        """
76        Returns a list of variables for the given dataset
77        group/dataset combination or datasetURI. The variable name used is selected
78        hierarchically depending on the available attributes. Each returned item in
79        the list includes a [<long_name>, <id>].
80        """ 
81        rtvars=[["All variables [mandatory]","all-vars"]]
82        return rtvars
83
84
85    def getDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
86        """
87        Returns the full domain listing for a variable returning:
88       
89        [knownAxisString, id, longName, units, listType, unusedItem,
90        listValue-1, listValue-2, ..., listValue-n]
91       
92        For example:
93       
94        ["time", "time", "Time", "hours since 1999-09-09 00:00:00", "start end interval",
95        "", 0, 3, 6]
96       
97        This listType represents 6-hourly time steps of 0,1,2,3 past the base time.
98       
99        listType can also take the value "full list" where all values in the list are provided,
100        or "start end" where only the first and last value are given.
101        """ 
102        rtlist=[]
103        tableID=midasMetadataHandler.uriMap(dataset)
104        (timeDomain, bbox, areas)=midasMetadataHandler.getTableSummary(tableID)
105
106        latLimits=(bbox[0], bbox[2])
107        lonLimits=(bbox[1], bbox[3])
108       
109        """if dataset.find("UK")>-1:
110            lonLimits=(-12,3)
111            latLimits=(49,61)
112        else: # global
113            lonLimits=(0,360)
114            latLimits=(-90,90)"""
115
116        lon=["longitude","longitude","Longitude","degrees_east","start end","",lonLimits[0], lonLimits[1]]
117        lat=["latitude","latitude","Latitude","degrees_north","start end","",latLimits[0], latLimits[1]]
118
119        (startTime, endTime, timeInterval, timeUnits)=timeDomain # midasMetadataHandler.getTemporalInfo(tableID)
120        time=["time","time","Time",timeUnits,"start end interval","",startTime, endTime, timeInterval]
121        countyList=areas # midasMetadataHandler.getCounties()
122        counties=["county","county","Counties","N/A","full list","Note that selection of items in this category will override the lat/lon selection"]+countyList
123        src_ids=["src_id","src_id","WMO Station Numbers","N/A","comma-separated integer1-5","Note that selection of items in this category will override lat/lon and county/country selections",0,0,0]
124
125        countries=["..."]
126           
127        rtlist=[time, lat, lon, counties, src_ids]
128        print rtlist
129        return rtlist
130       
131
132    def getHorizontalDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
133        """
134        Returns the horizontal domain as (northernExtent, westernExtent, southernExtent, easternExtent).
135        """
136        pass
137
138
139    def getVerticalSpatialDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
140        """
141        Returns the vertical domain as a tuple containing
142        a list of levels (or "Single level" string) and the units.
143        """
144        pass
145
146
147    def getTemporalDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
148        """
149        Returns the temporal domain as a tuple of (start time, end time,
150        (interval value, interval units)).
151        """
152        tableID=midasMetadataHandler.uriMap(datasetURI)
153        (start_time, end_time, interval_value, time_units)=midasMetadataHandler.getTemporalInfo(tableID)
154        if time_units[-1]=="s":  time_units=time_units[:-1]
155        return (start_time, end_time, (interval_value, time_units)) 
156
157
158    def getSelectedTimeSteps(self, datasetURI, variable, axisSelectionDict):
159        """
160        Returns a list of time step strings based on the selection.
161        """     
162        timeAxis=self.getTemporalDomain(variable=variable, datasetURI=datasetURI)
163        tableID=midasMetadataHandler.uriMap(datasetURI)
164        (start_time, end_time, interval_value, time_units)=midasMetadataHandler.getTemporalInfo(tableID)
165        startDateTime=None
166
167        timeAxisIndex=1
168
169        for key in axisSelectionDict.keys():
170            axisIndex=int(key.split(".")[-1])
171            if axisIndex==timeAxisIndex:
172                print axisSelectionDict
173                (startDateTime, endDateTime)=axisSelectionDict[key][:2]
174       
175        if startDateTime==None:
176            return [str(tst) for tst in timeAxis.asComponentTime()]
177       
178        startDateTime=startDateTime.replace("T", " ")
179        items=startDateTime.split(":")
180        startDateTime=":".join(items[:-1])+":"+("%f" % float(items[-1]))
181        endDateTime=endDateTime.replace("T", " ")
182        items=endDateTime.split(":")
183        endDateTime=":".join(items[:-1])+":"+("%f" % float(items[-1]))
184       
185        timeSteps=DateTimeManager.createList(getDateTimeComponents(startDateTime), 
186                                             getDateTimeComponents(endDateTime), (interval_value, time_units),
187                                             listtype="tuple")                                     
188        selectedTimes=[]
189
190        for timeStep in timeSteps:
191            ts=timeStep
192            timeStep="%.4d-%.2d-%.2d %.2d:%.2d:%f" % ts
193            ts=str(timeStep)
194            if ts>endDateTime:
195                break
196            elif ts<startDateTime:
197                continue
198            else:
199                selectedTimes.append(ts)
200       
201        if selectedTimes==[]:
202            raise DXOptionHandlingError, "All selected time steps for '%s' are out of range, please go back and re-select." % variable
203               
204        return selectedTimes
205
206
207    def _getMIDASAxisDetails(self, axisSelectionDict):
208        """
209        Returns a dictionary of selections of items with keys:
210        "time", "lat", "lon", "counties", "src_id".
211        """
212        axisMap=["dummy", "time", "lat", "lon", "county", "src_id"]
213        keys=axisSelectionDict.keys()
214        keys.sort()
215
216        src_ids=[]
217        counties=[]
218
219        for key in keys:
220            axisIndex=int(key.split(".")[-1])
221
222            if axisIndex==1:   # time
223                (startTime, endTime)=axisSelectionDict[key]  #midasMetadataHandler.getTemporalInfo(self.file)[:2]
224            elif axisIndex==2: # lat
225                lat=axisSelectionDict[key] #(52,57)
226            elif axisIndex==3: # lon
227                lon=axisSelectionDict[key] #(-5,2)
228            elif axisIndex==4: # county
229                counties=axisSelectionDict[key] #["CORNWALL", "DEVON"]
230            elif axisIndex==5: # src_id
231                src_ids=axisSelectionDict[key] #["30874","55518","55572","55668"]
232
233        d={}
234        d["time"]=(startTime,endTime)
235        d["lat"]=lat
236        d["lon"]=lon
237        d["counties"]=counties
238        d["src_ids"]=src_ids
239        return d
240
241
242
243    def getSelectedVariableArrayDetails(self, datasetURI, variable, axisSelectionDict):
244        """
245        Returns a tuple representing the (array shape, grid shape, size)
246        of the selected subset of a variable. Grid shape can be None if both latitude
247        and longitude axes are not present.
248        """
249        axDict=self._getMIDASAxisDetails(axisSelectionDict)
250
251        (startTime, endTime)=axDict["time"]
252        lat=axDict["lat"]
253        lon=axDict["lon"]
254        counties=axDict["counties"]
255        src_ids=axDict["src_ids"]
256
257        bbox=(lat[0], lon[0], lat[1], lon[1])
258
259        if src_ids==[]:
260            nsrcids=None
261        else:
262            nsrcids=len(src_ids)
263
264        if counties==[]:
265            areaCount=None   
266        else:
267            areaCount=len(counties)
268
269        print startTime, endTime
270     
271        (sy,sm)=(int(startTime[:4]), int(startTime[5:7]))
272        (ey,em)=(int(endTime[:4]), int(endTime[5:7]))
273        print (ey,sm)
274     
275        mc=0
276        if ey==sy:
277            mc=em-sm
278        elif ey==(sy+1):
279            mc=(12-sm)+em
280        else: 
281            mc=(12-sm)+em
282            for y in range(ey-sy-1):
283                mc=mc+12
284       
285        if mc<1: mc=1
286        nmonths=mc
287
288        midYear=(ey-sy)+sy
289
290        tid=midasMetadataHandler.uriMap(datasetURI)
291        (estDur, estVol)=midasMetadataHandler.getMidasExtractionCosts(tid, nmonths, midYear, areaCount=areaCount, bbox=bbox, nsrcids=nsrcids)
292
293        # Add a bit if need to filter by srcid
294        estDur=estDur+30               
295
296        (arrayShape, gridShape, size)=(None, None, estVol)
297        return (arrayShape, gridShape, size)
298
299
300    def getSelectedVariableSubsetSize(self, datasetURI, varID, axisSelectionDict):
301        """
302        Returns the size in bytes of the selected subset of a variable.
303        """
304        return self.getSelectedVariableArrayDetails(datasetURI, varID, axisSelectionDict)[2]
305
306       
307    def subsetMIDASToASCIIFile(self, datasetURI, axisSelectionDict, outputPath, region=None):
308        """
309        Reads the variable with ID 'variable' into memory from file
310        'datasetURI' - sub-setting across all axes indicated in 'axisSelectionDict'.
311        If 'timeStep' is provided then override the time selection in 'axisSelectionDict'
312        with the 'timeStep' given.
313        """
314        # convert axis selection and timeStep*** to args for midasSubsetter
315        tableID=midasMetadataHandler.uriMap(datasetURI)
316
317        axDict=self._getMIDASAxisDetails(axisSelectionDict)
318
319        (startTime, endTime)=axDict["time"]
320        lat=axDict["lat"]
321        lon=axDict["lon"]
322        counties=axDict["counties"]
323        src_ids=axDict["src_ids"]
324
325        bbox=(lat[0], lon[0], lat[1], lon[1])
326
327       
328        print startTime, endTime
329        print "Selected counties:", counties
330
331     
332
333        if src_ids==[]: # Invoke station id getter
334            if counties==[]: 
335                b=list(bbox)
336                if b[2]>b[0]: 
337                    bt=b[0]
338                    b[0]=b[2]
339                    b[2]=bt
340                boundingBox=b
341            else: 
342                boundingBox=None
343            print "BB:", boundingBox
344           
345            if counties!=[]:
346                cunique=[]
347                for c in counties:
348                    if c not in cunique: cunique.append(c)
349                src_ids=StationIDGetter(cunique, bbox=None, dataTypes=[], startTime=startTime,
350                             endTime=endTime, outputFile=None, noprint=1).stList
351            elif boundingBox!=[61.0,-12.0,49.0,3.0]:
352                src_ids=StationIDGetter(counties, boundingBox, dataTypes=[], startTime=startTime,
353                             endTime=endTime, outputFile=None, noprint=1).stList
354            else:
355                src_ids=None
356        else:
357            print "Not calling station ID getter as list provided."
358            # Clean up src_ids
359            newlist=[]
360            for sid in src_ids:
361                ns=str(int(float(sid)))
362                if ns not in newlist: newlist.append(ns)
363                src_ids=newlist
364
365        # Reformat start and end time for MIDAS subsetter
366        patt=re.compile("(\d{4})-(\d+)-(\d+).(\d+):(\d+):(\d+)")
367        st="%.4d%.2d%.2d%.2d%.2d" % tuple([int(i) for i in patt.match(startTime).groups()][:5])
368        et="%.4d%.2d%.2d%.2d%.2d" % tuple([int(i) for i in patt.match(endTime).groups()][:5])
369
370
371        print "\nStart and end times for selection:", st, et
372        print "Writing to output path:", outputPath
373        MIDASSubsetter([tableID], outputPath, startTime=st, endTime=et,   
374                       src_ids=src_ids, region=region)   
375       
376
377    def getCFGlobalAttributes(self, datafile):
378        """
379        Gets any CF metadata global attributes that are available
380        from the source dataset/file.
381        """
382        # Make sure data file is open
383        if self.file==None: self._openDataFile(datasetURI=datafile)
384        gatts={}
385
386        for gatt in CF_METADATA_GLOBAL_ATTRIBUTE_KEYS:
387            if hasattr(self.file, gatt):
388                gatts[gatt]=self.file.__getattr__(gatt)
389       
390        return gatts
391
392
393if __name__=="__main__":       
394    a=MIDASDataHandler()
395
396    a.subsetMIDASToASCIIFile('class://MIDAS/rain-daily', 
397{
398'axis_10.1.1.5': [14958.0, 14958.0], 'axis_10.1.1.2': [49.0, 61.0], 
399'axis_10.1.1.3': [-12.0, 3.0],  'axis_10.1.1.1': ['1989-01-01T00:00:0.000000', 
400'1989-01-05T00:00:0.000000'], 'axis_10.1.1.4': ['HEREFORD & WORCESTER', 'HERTFORDSHIRE', 'HUMBERSIDE'],}, 
401'/requests/astephen/dx_output/ukmo-midas_rain-daily_TEST_all-vars.txt')
402    sys.exit()
403    print a.getVariables(datasetGroup='Met Office - MIDAS Land Surface Observation Stations Data',                     
404                 datasetURI='class://MIDAS/rain-daily')   
405    print a.getDomain('Met Office - MIDAS Land Surface Observation Stations Data', 'class://MIDAS/rain-daily', "all-vars")
406    print a.getSelectedTimeSteps('class://MIDAS/rain-daily', 'all-vars', 
407                   {"axis_10.2.1.1":['1959-01-01T09:00:0.000000', '1959-01-01T23:00:0.000000'],
408                    "axis_10.2.1.2":[61.0, 49.0], "axis_10.2.1.3":[-12.0, 3.0]})
409
410    print a.getSelectedVariableArrayDetails('class://MIDAS/rain-daily', 'all-vars',
411                    {"axis_7.4.1.1":['1968-01-01T11:00:0.000000', '1968-01-01T11:59:0.000000'],
412                     "axis_7.4.1.2":[61.0, 49.0],
413                     "axis_7.4.1.3":[-12.0, 3.0]})
414    """axis_7.4.1.1 = ['1968-01-01T11:00:0.000000', '1968-01-01T11:59:0.000000']
415    axis_7.4.1.2 = [61.0, 49.0]
416    axis_7.4.1.3 = [-12.0, 3.0]
417    datasetGroup_7 = Met Office - MIDAS Land Surface Observation Stations Data
418    dataset_7.1 = UK Daily Rainfall
419    dataset_7.3 = UK Daily Weather
420    dataset_7.4 = UK Hourly Rainfall
421    login = Login
422    outputFormat_7.1.1 = ASCII Text
423    outputFormat_7.3.1 = ASCII Text
424    outputFormat_7.4.1 = ASCII Text
425    """
426    print a.getSelectedVariableArrayDetails('class://MIDAS/rain-daily', "all-vars",
427                     {'axis_1.1.1.1':('1999-01-01T00:00:0.000000', '1999-01-02T00:00:0.000000'), 
428                      'axis_1.1.1.3': [0.0, 355.0], 'axis_1.1.1.2': [-90.0, 90.0]})
429                     
430    print a.subsetMIDASToASCIIFile('class://MIDAS/rain-daily', 
431                     {'axis_1.1.1.1':('1999-01-01T00:00:0.000000', '1999-01-02T00:00:0.000000'), 
432                      'axis_1.1.1.3': [0.0, 355.0], 'axis_1.1.1.2': [-90.0, 90.0]}, "/tmp/stuff.txt")
Note: See TracBrowser for help on using the repository browser.