source: TI03-DataExtractor/branches/repackaging/server/lib/ndg/dx/server/MIDASDataHandler.py @ 1776

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI03-DataExtractor/branches/repackaging/server/lib/ndg/dx/server/MIDASDataHandler.py@1776
Revision 1776, 15.0 KB checked in by spascoe, 13 years ago (diff)

Various changes, mainly to config importing.

Autogenerated DXWSInterface script now initialises successfully
(i.e. no import errors).

Line 
1#   Copyright (C) 2004 CCLRC & NERC( Natural Environment Research Council ).
2#   This software may be distributed under the terms of the
3#   Q Public License, version 1.0 or later. http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
4
5"""
6MIDASDataHandler.py
7===================
8
9MIDASDataHandler module for the dx package.
10
11This module holds the MIDASDataHandler class that is used
12to hold and access information about datasets held in BADC's
13MIDAS Database clone (set of flat files).
14
15"""
16
17# Import required modules
18import os
19import cdms
20import re
21
22# Import global variables
23from common import *
24from DXDMLHandler import DXDMLHandler
25from DXErrors import *
26import DateTimeManager
27import sys
28sys.path.append("/home/badc/software/datasets/ukmo-midas/scripts/extract")
29try:
30    import midasMetadataHandler
31    from midasSubsetter import *
32    from getStations import *
33except:
34    pass
35
36
37class MIDASDataHandler:
38    """
39    A group of methods to connect to a dataset group or
40    dataset to extract information about the contents.
41    """
42 
43    def __init__(self, datasetURI=None):
44        """
45        Set up instance variables.
46        """
47        self.DXDML=DXDMLHandler()
48        self.file=datasetURI
49        if self.file: self._openDataFile(datasetURI=self.file)
50   
51   
52    def _openDataFile(self, datasetGroup=None, dataset=None, datasetURI=None):
53        """
54        Opens a file and allocates to file handle called: self.file.
55        """
56        pass
57
58
59    def _getVariable(self, varname):
60        """
61        Gets variable metadata object from a data file.
62        """
63        pass
64       
65
66    def _getBestName(self, v, vcount=0):
67        """
68        Returns the best name for a cdms variable.
69        """
70        return v
71
72
73    def getVariables(self, datasetGroup=None, dataset=None, datasetURI=None):
74        """
75        Returns a list of variables for the given dataset
76        group/dataset combination or datasetURI. The variable name used is selected
77        hierarchically depending on the available attributes. Each returned item in
78        the list includes a [<long_name>, <id>].
79        """ 
80        rtvars=[["All variables [mandatory]","all-vars"]]
81        return rtvars
82
83
84    def getDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
85        """
86        Returns the full domain listing for a variable returning:
87       
88        [knownAxisString, id, longName, units, listType, unusedItem,
89        listValue-1, listValue-2, ..., listValue-n]
90       
91        For example:
92       
93        ["time", "time", "Time", "hours since 1999-09-09 00:00:00", "start end interval",
94        "", 0, 3, 6]
95       
96        This listType represents 6-hourly time steps of 0,1,2,3 past the base time.
97       
98        listType can also take the value "full list" where all values in the list are provided,
99        or "start end" where only the first and last value are given.
100        """ 
101        rtlist=[]
102        tableID=midasMetadataHandler.uriMap(dataset)
103        (timeDomain, bbox, areas)=midasMetadataHandler.getTableSummary(tableID)
104
105        latLimits=(bbox[0], bbox[2])
106        lonLimits=(bbox[1], bbox[3])
107       
108        """if dataset.find("UK")>-1:
109            lonLimits=(-12,3)
110            latLimits=(49,61)
111        else: # global
112            lonLimits=(0,360)
113            latLimits=(-90,90)"""
114
115        lon=["longitude","longitude","Longitude","degrees_east","start end","",lonLimits[0], lonLimits[1]]
116        lat=["latitude","latitude","Latitude","degrees_north","start end","",latLimits[0], latLimits[1]]
117
118        (startTime, endTime, timeInterval, timeUnits)=timeDomain # midasMetadataHandler.getTemporalInfo(tableID)
119        time=["time","time","Time",timeUnits,"start end interval","",startTime, endTime, timeInterval]
120        countyList=areas # midasMetadataHandler.getCounties()
121        counties=["county","county","Counties","N/A","full list","Note that selection of items in this category will override the lat/lon selection"]+countyList
122        src_ids=["src_id","src_id","WMO Station Numbers","N/A","comma-separated integer1-5","Note that selection of items in this category will override lat/lon and county/country selections",0,0,0]
123
124        countries=["..."]
125           
126        rtlist=[time, lat, lon, counties, src_ids]
127        print rtlist
128        return rtlist
129       
130
131    def getHorizontalDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
132        """
133        Returns the horizontal domain as (northernExtent, westernExtent, southernExtent, easternExtent).
134        """
135        pass
136
137
138    def getVerticalSpatialDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
139        """
140        Returns the vertical domain as a tuple containing
141        a list of levels (or "Single level" string) and the units.
142        """
143        pass
144
145
146    def getTemporalDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
147        """
148        Returns the temporal domain as a tuple of (start time, end time,
149        (interval value, interval units)).
150        """
151        tableID=midasMetadataHandler.uriMap(datasetURI)
152        (start_time, end_time, interval_value, time_units)=midasMetadataHandler.getTemporalInfo(tableID)
153        if time_units[-1]=="s":  time_units=time_units[:-1]
154        return (start_time, end_time, (interval_value, time_units)) 
155
156
157    def getSelectedTimeSteps(self, datasetURI, variable, axisSelectionDict):
158        """
159        Returns a list of time step strings based on the selection.
160        """     
161        timeAxis=self.getTemporalDomain(variable=variable, datasetURI=datasetURI)
162        tableID=midasMetadataHandler.uriMap(datasetURI)
163        (start_time, end_time, interval_value, time_units)=midasMetadataHandler.getTemporalInfo(tableID)
164        startDateTime=None
165
166        timeAxisIndex=1
167
168        for key in axisSelectionDict.keys():
169            axisIndex=int(key.split(".")[-1])
170            if axisIndex==timeAxisIndex:
171                print axisSelectionDict
172                (startDateTime, endDateTime)=axisSelectionDict[key][:2]
173       
174        if startDateTime==None:
175            return [str(tst) for tst in timeAxis.asComponentTime()]
176       
177        startDateTime=startDateTime.replace("T", " ")
178        items=startDateTime.split(":")
179        startDateTime=":".join(items[:-1])+":"+("%f" % float(items[-1]))
180        endDateTime=endDateTime.replace("T", " ")
181        items=endDateTime.split(":")
182        endDateTime=":".join(items[:-1])+":"+("%f" % float(items[-1]))
183       
184        timeSteps=DateTimeManager.createList(getDateTimeComponents(startDateTime), 
185                                             getDateTimeComponents(endDateTime), (interval_value, time_units),
186                                             listtype="tuple")                                     
187        selectedTimes=[]
188
189        for timeStep in timeSteps:
190            ts=timeStep
191            timeStep="%.4d-%.2d-%.2d %.2d:%.2d:%f" % ts
192            ts=str(timeStep)
193            if ts>endDateTime:
194                break
195            elif ts<startDateTime:
196                continue
197            else:
198                selectedTimes.append(ts)
199       
200        if selectedTimes==[]:
201            raise DXOptionHandlingError, "All selected time steps for '%s' are out of range, please go back and re-select." % variable
202               
203        return selectedTimes
204
205
206    def _getMIDASAxisDetails(self, axisSelectionDict):
207        """
208        Returns a dictionary of selections of items with keys:
209        "time", "lat", "lon", "counties", "src_id".
210        """
211        axisMap=["dummy", "time", "lat", "lon", "county", "src_id"]
212        keys=axisSelectionDict.keys()
213        keys.sort()
214
215        src_ids=[]
216        counties=[]
217
218        for key in keys:
219            axisIndex=int(key.split(".")[-1])
220
221            if axisIndex==1:   # time
222                (startTime, endTime)=axisSelectionDict[key]  #midasMetadataHandler.getTemporalInfo(self.file)[:2]
223            elif axisIndex==2: # lat
224                lat=axisSelectionDict[key] #(52,57)
225            elif axisIndex==3: # lon
226                lon=axisSelectionDict[key] #(-5,2)
227            elif axisIndex==4: # county
228                counties=axisSelectionDict[key] #["CORNWALL", "DEVON"]
229            elif axisIndex==5: # src_id
230                src_ids=axisSelectionDict[key] #["30874","55518","55572","55668"]
231
232        d={}
233        d["time"]=(startTime,endTime)
234        d["lat"]=lat
235        d["lon"]=lon
236        d["counties"]=counties
237        d["src_ids"]=src_ids
238        return d
239
240
241
242    def getSelectedVariableArrayDetails(self, datasetURI, variable, axisSelectionDict):
243        """
244        Returns a tuple representing the (array shape, grid shape, size)
245        of the selected subset of a variable. Grid shape can be None if both latitude
246        and longitude axes are not present.
247        """
248        axDict=self._getMIDASAxisDetails(axisSelectionDict)
249
250        (startTime, endTime)=axDict["time"]
251        lat=axDict["lat"]
252        lon=axDict["lon"]
253        counties=axDict["counties"]
254        src_ids=axDict["src_ids"]
255
256        bbox=(lat[0], lon[0], lat[1], lon[1])
257
258        if src_ids==[]:
259            nsrcids=None
260        else:
261            nsrcids=len(src_ids)
262
263        if counties==[]:
264            areaCount=None   
265        else:
266            areaCount=len(counties)
267
268        print startTime, endTime
269     
270        (sy,sm)=(int(startTime[:4]), int(startTime[5:7]))
271        (ey,em)=(int(endTime[:4]), int(endTime[5:7]))
272        print (ey,sm)
273     
274        mc=0
275        if ey==sy:
276            mc=em-sm
277        elif ey==(sy+1):
278            mc=(12-sm)+em
279        else: 
280            mc=(12-sm)+em
281            for y in range(ey-sy-1):
282                mc=mc+12
283       
284        if mc<1: mc=1
285        nmonths=mc
286
287        midYear=(ey-sy)+sy
288
289        tid=midasMetadataHandler.uriMap(datasetURI)
290        (estDur, estVol)=midasMetadataHandler.getMidasExtractionCosts(tid, nmonths, midYear, areaCount=areaCount, bbox=bbox, nsrcids=nsrcids)
291
292        # Add a bit if need to filter by srcid
293        estDur=estDur+30               
294
295        (arrayShape, gridShape, size)=(None, None, estVol)
296        return (arrayShape, gridShape, size)
297
298
299    def getSelectedVariableSubsetSize(self, datasetURI, varID, axisSelectionDict):
300        """
301        Returns the size in bytes of the selected subset of a variable.
302        """
303        return self.getSelectedVariableArrayDetails(datasetURI, varID, axisSelectionDict)[2]
304
305       
306    def subsetMIDASToASCIIFile(self, datasetURI, axisSelectionDict, outputPath, region=None):
307        """
308        Reads the variable with ID 'variable' into memory from file
309        'datasetURI' - sub-setting across all axes indicated in 'axisSelectionDict'.
310        If 'timeStep' is provided then override the time selection in 'axisSelectionDict'
311        with the 'timeStep' given.
312        """
313        # convert axis selection and timeStep*** to args for midasSubsetter
314        tableID=midasMetadataHandler.uriMap(datasetURI)
315
316        axDict=self._getMIDASAxisDetails(axisSelectionDict)
317
318        (startTime, endTime)=axDict["time"]
319        lat=axDict["lat"]
320        lon=axDict["lon"]
321        counties=axDict["counties"]
322        src_ids=axDict["src_ids"]
323
324        bbox=(lat[0], lon[0], lat[1], lon[1])
325
326       
327        print startTime, endTime
328        print "Selected counties:", counties
329
330     
331
332        if src_ids==[]: # Invoke station id getter
333            if counties==[]: 
334                b=list(bbox)
335                if b[2]>b[0]: 
336                    bt=b[0]
337                    b[0]=b[2]
338                    b[2]=bt
339                boundingBox=b
340            else: 
341                boundingBox=None
342            print "BB:", boundingBox
343           
344            if counties!=[]:
345                cunique=[]
346                for c in counties:
347                    if c not in cunique: cunique.append(c)
348                src_ids=StationIDGetter(cunique, bbox=None, dataTypes=[], startTime=startTime,
349                             endTime=endTime, outputFile=None, noprint=1).stList
350            elif boundingBox!=[61.0,-12.0,49.0,3.0]:
351                src_ids=StationIDGetter(counties, boundingBox, dataTypes=[], startTime=startTime,
352                             endTime=endTime, outputFile=None, noprint=1).stList
353            else:
354                src_ids=None
355        else:
356            print "Not calling station ID getter as list provided."
357            # Clean up src_ids
358            newlist=[]
359            for sid in src_ids:
360                ns=str(int(float(sid)))
361                if ns not in newlist: newlist.append(ns)
362                src_ids=newlist
363
364        # Reformat start and end time for MIDAS subsetter
365        patt=re.compile("(\d{4})-(\d+)-(\d+).(\d+):(\d+):(\d+)")
366        st="%.4d%.2d%.2d%.2d%.2d" % tuple([int(i) for i in patt.match(startTime).groups()][:5])
367        et="%.4d%.2d%.2d%.2d%.2d" % tuple([int(i) for i in patt.match(endTime).groups()][:5])
368
369
370        print "\nStart and end times for selection:", st, et
371        print "Writing to output path:", outputPath
372        MIDASSubsetter([tableID], outputPath, startTime=st, endTime=et,   
373                       src_ids=src_ids, region=region)   
374       
375
376    def getCFGlobalAttributes(self, datafile):
377        """
378        Gets any CF metadata global attributes that are available
379        from the source dataset/file.
380        """
381        # Make sure data file is open
382        if self.file==None: self._openDataFile(datasetURI=datafile)
383        gatts={}
384
385        for gatt in CF_METADATA_GLOBAL_ATTRIBUTE_KEYS:
386            if hasattr(self.file, gatt):
387                gatts[gatt]=self.file.__getattr__(gatt)
388       
389        return gatts
390
391
392if __name__=="__main__":       
393    a=MIDASDataHandler()
394
395    a.subsetMIDASToASCIIFile('class://MIDAS/rain-daily', 
396{
397'axis_10.1.1.5': [14958.0, 14958.0], 'axis_10.1.1.2': [49.0, 61.0], 
398'axis_10.1.1.3': [-12.0, 3.0],  'axis_10.1.1.1': ['1989-01-01T00:00:0.000000', 
399'1989-01-05T00:00:0.000000'], 'axis_10.1.1.4': ['HEREFORD & WORCESTER', 'HERTFORDSHIRE', 'HUMBERSIDE'],}, 
400'/requests/astephen/dx_output/ukmo-midas_rain-daily_TEST_all-vars.txt')
401    sys.exit()
402    print a.getVariables(datasetGroup='Met Office - MIDAS Land Surface Observation Stations Data',                     
403                 datasetURI='class://MIDAS/rain-daily')   
404    print a.getDomain('Met Office - MIDAS Land Surface Observation Stations Data', 'class://MIDAS/rain-daily', "all-vars")
405    print a.getSelectedTimeSteps('class://MIDAS/rain-daily', 'all-vars', 
406                   {"axis_10.2.1.1":['1959-01-01T09:00:0.000000', '1959-01-01T23:00:0.000000'],
407                    "axis_10.2.1.2":[61.0, 49.0], "axis_10.2.1.3":[-12.0, 3.0]})
408
409    print a.getSelectedVariableArrayDetails('class://MIDAS/rain-daily', 'all-vars',
410                    {"axis_7.4.1.1":['1968-01-01T11:00:0.000000', '1968-01-01T11:59:0.000000'],
411                     "axis_7.4.1.2":[61.0, 49.0],
412                     "axis_7.4.1.3":[-12.0, 3.0]})
413    """axis_7.4.1.1 = ['1968-01-01T11:00:0.000000', '1968-01-01T11:59:0.000000']
414    axis_7.4.1.2 = [61.0, 49.0]
415    axis_7.4.1.3 = [-12.0, 3.0]
416    datasetGroup_7 = Met Office - MIDAS Land Surface Observation Stations Data
417    dataset_7.1 = UK Daily Rainfall
418    dataset_7.3 = UK Daily Weather
419    dataset_7.4 = UK Hourly Rainfall
420    login = Login
421    outputFormat_7.1.1 = ASCII Text
422    outputFormat_7.3.1 = ASCII Text
423    outputFormat_7.4.1 = ASCII Text
424    """
425    print a.getSelectedVariableArrayDetails('class://MIDAS/rain-daily', "all-vars",
426                     {'axis_1.1.1.1':('1999-01-01T00:00:0.000000', '1999-01-02T00:00:0.000000'), 
427                      'axis_1.1.1.3': [0.0, 355.0], 'axis_1.1.1.2': [-90.0, 90.0]})
428                     
429    print a.subsetMIDASToASCIIFile('class://MIDAS/rain-daily', 
430                     {'axis_1.1.1.1':('1999-01-01T00:00:0.000000', '1999-01-02T00:00:0.000000'), 
431                      'axis_1.1.1.3': [0.0, 355.0], 'axis_1.1.1.2': [-90.0, 90.0]}, "/tmp/stuff.txt")
Note: See TracBrowser for help on using the repository browser.