source: TI03-DataExtractor/branches/csml_hooks/CSMLDataHandler.py @ 891

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI03-DataExtractor/branches/csml_hooks/CSMLDataHandler.py@891
Revision 891, 21.8 KB checked in by domlowe, 14 years ago (diff)

added comments to CSMLDataHandler.py

Line 
1#   Copyright (C) 2004 CCLRC & NERC( Natural Environment Research Council ).
2#   This software may be distributed under the terms of the
3#   Q Public License, version 1.0 or later. http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
4
5"""
6CSMLDataHandler.py
7==================
8
9CSMLDataHandler module for the dx package.
10
11This module holds the CSMLDataHandler class that is used
12to hold and access information about datasets held in CSML-compatible
13formats visible to the dx package.
14
15"""
16
17# Import required modules
18import os
19import cdms
20import re
21
22# Import global variables
23#from serverConfig import *
24#from common import *
25#from DXDMLHandler import DXDMLHandler
26#from DXErrors import *
27
28def getValuesInRange(start, end, array):
29    """
30    Takes a start and end value and returns the values in the array that are bet
31ween them.
32    If not in range and are the same value then returns [start].
33    """
34    # check all are floats
35    array=map(lambda x: float(x), list(array))
36    if array[0]>array[-1]: array.reverse()
37    if start>end:  (start, end)=(end, start)
38    (start, end)=(float(start), float(end))
39    rtarray=[]
40    for i in array:
41        if i>=start and i<=end:
42            rtarray.append(i)
43    if rtarray==[] and start==end:
44        rtarray=[start]
45    return rtarray
46
47
48
49class CSMLDataHandler:
50    """
51    A group of methods to connect to a dataset group or
52    dataset to extract information about the contents.
53    """
54 
55    def __init__(self, datasetURI=None):
56        """
57        Set up instance variables.
58        """
59        self.DXDML=None #DXDMLHandler()
60        self.file=datasetURI
61        if self.file: self._openDataFile(datasetURI=self.file) 
62   
63   
64    def _openDataFile(self, datasetGroup=None, dataset=None, datasetURI=None):
65        #CSML: I think this can be ignored. File opening is handled by the csmlio interface.
66        """
67        Opens a file and allocates to file handle called: self.file.
68        """
69        print """CSML: _openDataFile(datasetURI)
70Sets:    self.file=CSMLDataset(datasetURI)
71CSML_END
72"""
73        if datasetURI:
74            cdmlfile=datasetURI
75        else:
76            for item in self.DXDML.getDatasetsAndDatasetURIs(datasetGroup):
77                if item[0]==dataset:
78                    cdmlfile=item[1]
79       
80        try:
81            self.file=cdms.open(cdmlfile)
82        except IOError, error:
83            raise DXDataIOError, error
84
85
86    def _getVariable(self, varname):
87        #CSML, Not needed.
88         
89        """
90        Gets variable metadata object from a data file.
91        """
92        print """CSML: _getVariable(varname)
93Returns a CDML metadata variable.
94It contains metadata such as attributes and axes but not the
95data array. It is read from self.file.
96
97CSML_END
98"""     
99        try:
100            rtvalue=self.file[varname]
101        except:
102            raise DXOptionHandlingError, "Cannot find variable %s in file %s" % (varname, self.file.id)
103        return rtvalue
104       
105
106    def _getBestName(self, v, vcount=0):
107        #CSML: should probably call getFeatureParameter (which returns contents of the <parameter> element or failing that getFeatureDescription (which returns contents of the gml:description element)
108       
109        """
110        Returns the best name for a cdms variable.
111        """
112        print """CSML: _getBestName(v)
113Returns the best standard_name, long_name etc from those available.
114v is returned from self._getVariable()
115
116
117CSML_END
118"""     
119        if not hasattr(v, "standard_name"): 
120            if not hasattr(v, "long_name"):
121                if not hasattr(v, "title"):
122                    if not hasattr(v, "name"):
123                        if hasattr(v, "id"):
124                            name=v.id
125                        else:
126                            vcount=vcount+1
127                            name="unnamed_var_%s" % vcount
128                    else:
129                       name=v.name
130                else:
131                    name=v.title
132            else:
133                name=v.long_name
134        else:
135            name=v.standard_name
136        return name
137
138
139    def getVariables(self, datasetGroup=None, dataset=None, datasetURI=None):
140        """
141        CSML: call getFeatureList to return list of features
142        """
143       
144        """
145        Returns a list of variables for the given dataset
146        group/dataset combination or datasetURI. The variable name used is selected
147        hierarchically depending on the available attributes. Each returned item in
148        the list includes a [<long_name>, <id>].
149        """ 
150        print """CSML: getVariables(datasetURI)
151Returns a list of rank-2 lists containing:
152
153[[bestVarName, varID], [bestVarName2, varID2],....]
154
155CSML_END
156"""             
157        self._openDataFile(datasetGroup, dataset, datasetURI)
158        vars=self.file.listvariables()
159        rtvars=[]
160        vcount=0
161        for var in vars:
162           v=self.file[var]
163           name=self._getBestName(v, vcount)
164               
165           # Fix name to remove leading asterisks and lower case surface start.
166           name=name.replace("_", " ")
167           if name[:2]=="**": name=(name[2:]).strip()
168           if name[:7]=="surface": name=(name[7:]).strip()
169           # Remove variables they are actually bounds on axes or coefficients in formulae
170           if v.id not in ("bounds_longitude", "bounds_latitude", "bounds_level", "bounds_time", "p0"):
171               rtvars.append([name, v.id]) 
172
173        return rtvars
174
175
176    def getDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
177        """ CSML:      domain=csml.getDomain(strFeatureName)
178        at the moment returns something like:
179       
180        domain: {'latitude': [ 90. , 87.5, 85. , 82.5, 80. , 77.5, 75. , 72.5, 70. , 67.5, 65. , 62.5, 60. ,
181         57.5, 55. , 52.5, 50. , 47.5, 45. , 42.5, 40. , 37.5, 35. , 32.5,
182         30. , 27.5, 25. , 22.5, 20. , 17.5, 15. , 12.5, 10. ,  7.5,  5. ,
183          2.5,  0. , -2.5, -5. , -7.5,-10. ,-12.5,-15. ,-17.5,-20. ,-22.5,
184        -25. ,-27.5,-30. ,-32.5,-35. ,-37.5,-40. ,-42.5,-45. ,-47.5,-50. ,
185        -52.5,-55. ,-57.5,-60. ,-62.5,-65. ,-67.5,-70. ,-72.5,-75. ,-77.5,
186        -80. ,-82.5,-85. ,-87.5,-90. ,], 'Trop': [ 0.,], 't': '2783-05-22T00:00:00 2793-03-30T00:00:00 2803-02-06T00:00:00 2891-10-22T00:00:00', 'longitude': [   0.  ,   3.75,   7.5 ,  11.25,  15.  ,  18.75,  22.5 ,  26.25,  30.  ,
187        33.75,  37.5 ,  41.25,  45.  ,  48.75,  52.5 ,  56.25,  60.  ,  63.75,
188        67.5 ,  71.25,  75.  ,  78.75,  82.5 ,  86.25,  90.  ,  93.75,  97.5 ,
189       101.25, 105.  , 108.75, 112.5 , 116.25, 120.  , 123.75, 127.5 , 131.25,
190       135.  , 138.75, 142.5 , 146.25, 150.  , 153.75, 157.5 , 161.25, 165.  ,
191       168.75, 172.5 , 176.25, 180.  , 183.75, 187.5 , 191.25, 195.  , 198.75,
192       202.5 , 206.25, 210.  , 213.75, 217.5 , 221.25, 225.  , 228.75, 232.5 ,
193       236.25, 240.  , 243.75, 247.5 , 251.25, 255.  , 258.75, 262.5 , 266.25,
194       270.  , 273.75, 277.5 , 281.25, 285.  , 288.75, 292.5 , 296.25, 300.  ,
195       303.75, 307.5 , 311.25, 315.  , 318.75, 322.5 , 326.25, 330.  , 333.75,
196       337.5 , 341.25, 345.  , 348.75, 352.5 , 356.25,]}
197
198        """
199       
200       
201        """
202        Returns the full domain listing for a variable returning:
203       
204        [axisIndexString, knownAxisString, id, longName, units, listType, unusedItem,
205        listValue-1, listValue-2, ..., listValue-n]
206       
207        For example:
208       
209        ["axis1.0", "time", "time", "Time", "hours since 1999-09-09 00:00:00", "start end interval",
210        "", 0, 3, 6]
211       
212        This listType represents 6-hourly time steps of 0,1,2,3 past the base time.
213       
214        listType can also take the value "full list" where all values in the list are provided,
215        or "start end" where only the first and last value are given.
216        """ 
217        print """CSML: getDomain(variable, datasetURI)
218        Returns the full domain listing for a variable returning:
219       
220       
221        For example:
222       
223        ["time", "time", "Time", "hours since 1999-09-09 00:00:00", "start end interval",
224        "", 0, 3, 6]
225       
226        This listType represents 6-hourly time steps of 0,1,2,3 past the base time.
227       
228        listType can also take the value "full list" where all values in the list are provided,
229        or "start end" where only the first and last value are given.
230
231CSML_END
232"""             
233        self._openDataFile(datasetGroup, dataset, datasetURI)
234        var=self._getVariable(variable)
235        varList=self.file.listvariables()
236        varList.sort()
237        varIndex=varList.index(var.id)
238        rtlist=[]
239       
240        axcount=0
241        for axis in var.getAxisList():
242            units=None
243            if axis.isTime():
244                knownAxis="time"
245                (start, end, (intervalValue, intervalUnits))=self.getTemporalDomain(datasetGroup, dataset, variable, datasetURI)
246                arrayValues=[start, end, intervalValue]
247                listType="start end interval"
248                units=intervalUnits
249            elif axis.isLevel():
250                knownAxis="level"
251                arrayValues=axis[:]
252                listType="full list"axisIndexString
253            elif axis.isLatitude():
254                knownAxis="latitude"
255                arrayValues=[axis[0], axis[-1]]
256                arrayValues.sort()
257                arrayValues.reverse()
258                listType="start end" 
259            elif axis.isLongitude():
260                knownAxis="longitude"
261                arrayValues=[axis[0], axis[-1]]
262                arrayValues.sort()     
263                listType="start end"                                   
264            else:
265                # For any axis not known as above
266                knownAxis=""
267                if len(axis[:])>200:
268                    arrayValues=[axis[0], axis[-1]]
269                    listType="start end"                   
270                else:
271                    arrayValues=axis[:]
272                    listType="full list"
273           
274            id=axis.id
275            longName=self._getBestName(axis).title()
276            if not units:  units=getattr(axis, "units", "")
277           
278            unused=""
279            rtlist.append([knownAxis, id, longName, units, listType, unused]+arrayValues)
280            axcount=axcount+1
281           
282        return rtlist
283       
284
285    def getHorizontalDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
286        """
287        Returns the horizontal domain as (northernExtent, westernExtent, southernExtent, easternExtent).
288        """
289        print "Deprecated: NOT NEEDED"
290        self._openDataFile(datasetGroup, dataset, datasetURI)
291        var=self._getVariable(variable)
292        lat=list(var.getLatitude()[:])
293        if lat[-1]<lat[0]: lat.reverse()
294        (southernExtent, northernExtent)=(lat[0], lat[-1])
295        lon=var.getLongitude()[:]
296        (westernExtent, easternExtent)=(lon[0], lon[-1])
297        return (northernExtent, westernExtent, southernExtent, easternExtent)
298
299
300    def getVerticalSpatialDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
301        """
302        Returns the vertical domain as a tuple containing
303        a list of levels (or "Single level" string) and the units.
304        """
305        print "Deprecated: NOT NEEDED" 
306        self._openDataFile(datasetGroup, dataset, datasetURI)
307        var=self._getVariable(variable)
308        try:
309            levels=var.getLevel()
310            vertical_units=levels.units
311            vertical_domain=tuple(map(lambda x: float(x), levels[:]))
312     
313        except:
314            vertical_domain=("Single level",)
315            vertical_units=None
316        return (vertical_domain, vertical_units)
317
318
319    def getTemporalDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
320        """
321        Returns the temporal domain as a tuple of (start time, end time,
322        (interval value, interval units)).
323        """
324        print "Deprecated: NOT NEEDED" 
325        self._openDataFile(datasetGroup, dataset, datasetURI)
326        var=self._getVariable(variable)
327        time_axis=var.getTime()
328        time_keys=("year", "month", "day", "hour", "minute", "second")
329        start_time_components=time_axis.asComponentTime()[0]
330        end_time_components=time_axis.asComponentTime()[-1]
331        start_time=[]
332        end_time=[]
333        for key in time_keys:
334            start_time.append(getattr(start_time_components, key))
335            end_time.append(getattr(end_time_components, key)) 
336        time_units=time_axis.units.split()[0]
337        if time_units[-1]=="s":  time_units=time_units[:-1]
338        if len(time_axis)>1:
339            interval_value=abs(time_axis[1]-time_axis[0])
340        else:
341            interval_value=1  # To stop loops breaking later!
342        return (start_time, end_time, (interval_value, time_units)) 
343
344
345    def getSelectedTimeSteps(self, datasetURI, variable, axisSelectionDict):
346        """
347        Returns a list of time step strings based on the selection.
348   
349    CSML: something like:
350        timelist = domainref['t']
351#make some selection from the available times/spatialdomain:
352timeSelection = timelist.split()[:2]
353spatialSubsetDictionary= {}
354spatialSubsetDictionary['latitude']=(0.,10.0)
355spatialSubsetDictionary['longitude']=(90, 100.0)
356
357featureValues=csml.getDataForFeature('solar_3', timeSelection, **spatialSubsetDictionary)
358   
359        """     
360        print """CSML: getSelectedTimeSteps(datasetURI, variable, axisSelectionDict)
361Returns a list of time step strings based on the selection.
362
363Inputs look like:
364('file:/usr/local/dx/testdata/testdata1.xml', "pqn", {"axis_1.1.1.0":("1999-01-01T00:00:00", "1999-01-01T06:00:00"),
365                                                      "axis_1.1.1.1":(30,-30), "axis_1.1.1.2":(30,-30)}
366                                                     
367Outputs look like:
368['1999-01-01 00:00:0.000000', '1999-01-01 06:00:0.000000']
369
370CSML_END
371"""             
372        self._openDataFile(datasetURI=datasetURI)
373        var=self._getVariable(variable)
374       
375        timeAxis=None
376        axcount=0
377        for axis in var.getAxisList():
378            if axis.isTime():
379                timeAxisIndex=axcount
380                timeAxis=axis
381            axcount=axcount+1
382
383        if timeAxis==None:
384            return []
385         
386        startDateTime=None   
387        for key in axisSelectionDict.keys():
388            #print key, axisSelectionDict[key]
389            axisIndex=int(key.split(".")[-1])
390            if axisIndex==timeAxisIndex:
391                (startDateTime, endDateTime)=axisSelectionDict[key][:2]
392       
393        if startDateTime==None:
394            return [str(tst) for tst in timeAxis.asComponentTime()]
395       
396        startDateTime=startDateTime.replace("T", " ")
397        items=startDateTime.split(":")
398        startDateTime=":".join(items[:-1])+":"+("%f" % float(items[-1]))
399        endDateTime=endDateTime.replace("T", " ")
400        items=endDateTime.split(":")
401        endDateTime=":".join(items[:-1])+":"+("%f" % float(items[-1]))
402       
403        timeSteps=timeAxis.asComponentTime()
404        selectedTimes=[]
405
406        for timeStep in timeSteps:
407            ts=timeStep
408            timeStep="%.4d-%.2d-%.2d %.2d:%.2d:%f" % (ts.year, ts.month, ts.day, \
409                                            ts.hour, ts.minute, ts.second)
410            #print str(timeStep), startDateTime
411            ts=str(timeStep)
412            if ts>endDateTime:
413                break
414            elif ts<startDateTime:
415                continue
416            else:
417                selectedTimes.append(ts)
418       
419        if selectedTimes==[]:
420            raise DXOptionHandlingError, "All selected time steps for '%s' are out of range, please go back and re-select." % variable
421
422        return selectedTimes
423
424
425    def getSelectedVariableArrayDetails(self, datasetURI, variable, axisSelectionDict):
426        """
427        Returns a tuple representing the (array shape, grid shape, size)
428        of the selected subset of a variable. Grid shape can be None if both latitude
429        and longitude axes are not present.
430        """     
431        print """CSML: getSelectedVariableArrayDetails(datasetURI, variable, axisSelectionDict)
432Returns a tuple representing the (array shape, grid shape, size)
433of the selected subset of a variable. Grid shape can be None if both latitude
434and longitude axes are not present.
435
436Inputs look like:
437('file:/usr/local/dx/testdata/testdata1.xml', "pqn", {"axis_1.1.1.0":("1999-01-01T00:00:00", "1999-01-01T06:00:00"),
438                                                      "axis_1.1.1.1":(30,-30), "axis_1.1.1.2":(30,-30)}
439                                                     
440Outputs look like:
441((2, 37, 72), (37, 72), 42624)
442
443CSML_END
444"""             
445        self._openDataFile(datasetURI=datasetURI)
446        var=self._getVariable(variable)
447        varType=var.typecode()
448               
449        timeAxisIndex=None
450        axcount=0
451        for axis in var.getAxisList():
452            if axis.isTime():
453                timeAxisIndex=axcount
454            axcount=axcount+1
455         
456        startDateTime=None   
457       
458        axesCounted=[]
459        axLens=[]
460        latLength=None
461        lonLength=None
462        size=1
463        for key in axisSelectionDict.keys():
464            #print key, axisSelectionDict[key]
465            axisIndex=int(key.split(".")[-1])
466            (low, high)=axisSelectionDict[key][:2]
467            axis=var.getAxis(axisIndex)
468           
469            if axisIndex==timeAxisIndex:
470                axlen=len(self.getSelectedTimeSteps(datasetURI, variable, {key:(low, high)}))           
471            elif low==high: 
472                axlen=1
473            else:
474                axlen=len(getValuesInRange(low, high, axis[:]))
475                if axlen==0:
476                    if (axis.isLongitude() or axis.isLatitude()) and low==high:
477                        print "Lat and lon can be axis length zero because we'll nudge to nearest if only one value given."
478                        axlen=1
479                    else:
480                        raise DXOptionHandlingError, "All selected '%s' axis values for '%s' are out of range, please go back and re-select." % (axis.id, variable)
481       
482                if axis.isLatitude():
483                    latLength=axlen
484                elif axis.isLongitude():
485                    lonLength=axlen
486                       
487            size=size*axlen
488            axesCounted.append(axisIndex)
489            axLens.append(axlen)
490       
491        axcount=0
492        arrayShape=[]
493        for axis in var.getAxisList():
494            if axcount not in axesCounted:
495                size=size*len(axis)
496                arrayShape.append(len(axis))
497            else:
498                arrayShape.append(axLens[axesCounted[axcount]]) 
499            if axis.isLatitude() and latLength==None:
500                latLength=len(axis)
501            elif axis.isLongitude() and lonLength==None:
502                lonLength=len(axis)   
503            axcount=axcount+1
504       
505        # Now work out gridShape if appropriate
506        if latLength and lonLength:
507            gridShape=(latLength, lonLength)
508        else:
509            gridShape=None
510       
511        if varType=="f":
512            size=size*4.
513        elif varType=="d":
514            size=size*8
515        elif varType=="i":
516            size=size   
517
518        return (tuple(arrayShape), gridShape, size)
519
520
521    def getSelectedVariableSubsetSize(self, datasetURI, varID, axisSelectionDict):
522        """
523        Returns the size in bytes of the selected subset of a variable.
524        """
525        print "IGNORE: Calls above method."
526        return self.getSelectedVariableArrayDetails(datasetURI, varID, axisSelectionDict)[2]
527
528       
529    def readVariableSubsetIntoMemory(self, datasetURI, variable, axisSelectionDict, timeStep=None):
530        """
531        Reads the variable with ID 'variable' into memory from file
532        'datasetURI' - sub-setting across all axes indicated in 'axisSelectionDict'.
533        If 'timeStep' is provided then override the time selection in 'axisSelectionDict'
534        with the 'timeStep' given.
535        """
536        print """CSML: getSelectedTimeSteps(datasetURI, variable, axisSelectionDict, timeStep=None)
537        Reads the variable with ID 'variable' into memory from file
538        'datasetURI' - sub-setting across all axes indicated in 'axisSelectionDict'.
539        If 'timeStep' is provided then override the time selection in 'axisSelectionDict'
540        with the 'timeStep' given.
541
542Inputs look like:
543('file:/usr/local/dx/testdata/testdata1.xml', "pqn", {"axis_1.1.1.0":("1999-01-01T00:00:00", "1999-01-01T06:00:00"),
544                                                      "axis_1.1.1.1":(30,-30), "axis_1.1.1.2":(30,-30)}, "1999-01-01T12:00:00")
545                                                     
546Output is a CDMS transient variable which is a python Numeric Masked Array with metadata attributes and axes attached.
547
548CSML_END
549"""     
550        self._openDataFile(datasetURI=datasetURI)
551        var=self._getVariable(variable)
552       
553        axisList=var.getAxisList()
554           
555        selectionStrings=[]
556        for key in axisSelectionDict.keys():
557            #print key, axisSelectionDict[key]
558            axisIndex=int(key.split(".")[-1])
559            axis=axisList[axisIndex]   
560            id=axis.id
561           
562            # deal with time differently
563            if axis.isTime():
564                if timeStep!=None:     
565                    timeStep=timeStep.replace("T", " ")                 
566                    selectionStrings.append('time="%s"' % timeStep)
567                else:
568                    selector=axisSelectionDict[key][:2]
569                    selector=(selector[0].replace("T", " "), selector[1].replace("T", " "))
570                    selectionStrings.append('%s=%s' % (axis.id, str(selector)))         
571            elif axis.isLatitude():
572                (low, high)=axisSelectionDict[key][:2]
573                if low==high:
574                    loworig=low
575                    (low, high, nudgeMessage)=nudgeSingleValuesToAxisValues(low, axis[:], "Latitude")   
576                    if loworig!=low:   print "Nudged latitudes to nearest points..."                   
577            elif axis.isLongitude():
578                (low, high)=axisSelectionDict[key][:2]
579                if low==high:
580                    loworig=low
581                    (low, high, nudgeMessage)=nudgeSingleValuesToAxisValues(low, axis[:], "Longitude") 
582                    if loworig!=low:   print "Nudged latitudes to nearest points..."                         
583            else:
584                selector=axisSelectionDict[key][:2]
585                selectionStrings.append('%s=%s' % (axis.id, str(selector)))
586       
587        fullSelectionString=", ".join(selectionStrings)
588        variableData=eval("self.file('%s', %s)" % (variable, fullSelectionString))
589        return variableData         
590       
591
592    def getCFGlobalAttributes(self, datafile):
593        """
594        Gets any CF metadta global attributes that are available
595        from the source dataset/file.
596        """
597        print """CSML: getCFGlobalAttributes(datasetURI)
598Gets any CF metadta global attributes that are available from the source dataset/file.
599
600Inputs look like:
601('file:/usr/local/dx/testdata/testdata1.xml')
602                                                     
603Output is a dictionary of attribute,value pairs got from the input file.
604
605CSML_ENDaxisIndexString
606"""             
607        if self.file==None: self._openDataFile(datasetURI=datafile)
608        gatts={}
609        CF_METADATA_GLOBAL_ATTRIBUTE_KEYS="Conventions title source institution history references comment".split()
610
611        for gatt in CF_METADATA_GLOBAL_ATTRIBUTE_KEYS:
612            if hasattr(self.file, gatt):
613                gatts[gatt]=self.file.__getattr__(gatt)
614       
615        return gatts
616
617
618if __name__=="__main__":       
619    a=CSMLDataHandler()
620    print a.getVariables(datasetURI='var1.nc')
621    print a.getVariables(datasetURI='var1.nc')   
622    print a.getDomain(datasetURI='var1.nc', variable="pqn")
623    print a.getCFGlobalAttributes('var1.nc')
624    print a.getSelectedVariableArrayDetails('var1.nc', "pqn", 
625                     {"axis_1.1.1.0":("1999-01-01T00:00:00", "1999-01-01T06:00:00"),
626                     "axis_1.1.1.1":(30,-30), "axis_1.1.1.2":(30,-30)})
Note: See TracBrowser for help on using the repository browser.