source: TI03-DataExtractor/branches/csml_hooks/CDMSDataHandler.py @ 891

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI03-DataExtractor/branches/csml_hooks/CDMSDataHandler.py@891
Revision 891, 20.1 KB checked in by domlowe, 14 years ago (diff)

added comments to CSMLDataHandler.py

Line 
1#   Copyright (C) 2004 CCLRC & NERC( Natural Environment Research Council ).
2#   This software may be distributed under the terms of the
3#   Q Public License, version 1.0 or later. http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
4
5"""
6CSMLDataHandler.py
7==================
8
9CSMLDataHandler module for the dx package.
10
11This module holds the CSMLDataHandler class that is used
12to hold and access information about datasets held in CSML-compatible
13formats visible to the dx package.
14
15"""
16
17# Import required modules
18import os
19import cdms
20import re
21
22# Import global variables
23#from serverConfig import *
24#from common import *
25#from DXDMLHandler import DXDMLHandler
26#from DXErrors import *
27
28def getValuesInRange(start, end, array):
29    """
30    Takes a start and end value and returns the values in the array that are bet
31ween them.
32    If not in range and are the same value then returns [start].
33    """
34    # check all are floats
35    array=map(lambda x: float(x), list(array))
36    if array[0]>array[-1]: array.reverse()
37    if start>end:  (start, end)=(end, start)
38    (start, end)=(float(start), float(end))
39    rtarray=[]
40    for i in array:
41        if i>=start and i<=end:
42            rtarray.append(i)
43    if rtarray==[] and start==end:
44        rtarray=[start]
45    return rtarray
46
47
48
49class CSMLDataHandler:
50    """
51    A group of methods to connect to a dataset group or
52    dataset to extract information about the contents.
53    """
54 
55    def __init__(self, datasetURI=None):
56        """
57        Set up instance variables.
58        """
59        self.DXDML=None #DXDMLHandler()
60        self.file=datasetURI
61        if self.file: self._openDataFile(datasetURI=self.file) 
62   
63   
64    def _openDataFile(self, datasetGroup=None, dataset=None, datasetURI=None):
65        #CSML: I think this can be ignored. File opening is handled by the csmlio interface.
66        """
67        Opens a file and allocates to file handle called: self.file.
68        """
69        print """CSML: _openDataFile(datasetURI)
70Sets:    self.file=CSMLDataset(datasetURI)
71CSML_END
72"""
73        if datasetURI:
74            cdmlfile=datasetURI
75        else:
76            for item in self.DXDML.getDatasetsAndDatasetURIs(datasetGroup):
77                if item[0]==dataset:
78                    cdmlfile=item[1]
79       
80        try:
81            self.file=cdms.open(cdmlfile)
82        except IOError, error:
83            raise DXDataIOError, error
84
85
86    def _getVariable(self, varname):
87        #CSML, Not needed.
88         
89        """
90        Gets variable metadata object from a data file.
91        """
92        print """CSML: _getVariable(varname)
93Returns a CDML metadata variable.
94It contains metadata such as attributes and axes but not the
95data array. It is read from self.file.
96
97CSML_END
98"""     
99        try:
100            rtvalue=self.file[varname]
101        except:
102            raise DXOptionHandlingError, "Cannot find variable %s in file %s" % (varname, self.file.id)
103        return rtvalue
104       
105
106    def _getBestName(self, v, vcount=0):
107        #CSML: should probably call getFeatureParameter (which returns contents of the <parameter> element or failing that getFeatureDescription (which returns contents of the gml:description element)
108       
109        """
110        Returns the best name for a cdms variable.
111        """
112        print """CSML: _getBestName(v)
113Returns the best standard_name, long_name etc from those available.
114v is returned from self._getVariable()
115
116
117CSML_END
118"""     
119        if not hasattr(v, "standard_name"): 
120            if not hasattr(v, "long_name"):
121                if not hasattr(v, "title"):
122                    if not hasattr(v, "name"):
123                        if hasattr(v, "id"):
124                            name=v.id
125                        else:
126                            vcount=vcount+1
127                            name="unnamed_var_%s" % vcount
128                    else:
129                       name=v.name
130                else:
131                    name=v.title
132            else:
133                name=v.long_name
134        else:
135            name=v.standard_name
136        return name
137
138
139    def getVariables(self, datasetGroup=None, dataset=None, datasetURI=None):
140        """
141        CSML: call getFeatureList to return list of features
142        """
143       
144        """
145        Returns a list of variables for the given dataset
146        group/dataset combination or datasetURI. The variable name used is selected
147        hierarchically depending on the available attributes. Each returned item in
148        the list includes a [<long_name>, <id>].
149        """ 
150        print """CSML: getVariables(datasetURI)
151Returns a list of rank-2 lists containing:
152
153[[bestVarName, varID], [bestVarName2, varID2],....]
154
155CSML_END
156"""             
157        self._openDataFile(datasetGroup, dataset, datasetURI)
158        vars=self.file.listvariables()
159        rtvars=[]
160        vcount=0
161        for var in vars:
162           v=self.file[var]
163           name=self._getBestName(v, vcount)
164               
165           # Fix name to remove leading asterisks and lower case surface start.
166           name=name.replace("_", " ")
167           if name[:2]=="**": name=(name[2:]).strip()
168           if name[:7]=="surface": name=(name[7:]).strip()
169           # Remove variables they are actually bounds on axes or coefficients in formulae
170           if v.id not in ("bounds_longitude", "bounds_latitude", "bounds_level", "bounds_time", "p0"):
171               rtvars.append([name, v.id]) 
172
173        return rtvars
174
175
176    def getDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
177        """ CSML: Okay, I've not got far with Domains yet, but this may need to call several csmlio methods to get the data it needs.
178        """
179       
180       
181        """
182        Returns the full domain listing for a variable returning:
183       
184        [axisIndexString, knownAxisString, id, longName, units, listType, unusedItem,
185        listValue-1, listValue-2, ..., listValue-n]
186       
187        For example:
188       
189        ["axis1.0", "time", "time", "Time", "hours since 1999-09-09 00:00:00", "start end interval",
190        "", 0, 3, 6]
191       
192        This listType represents 6-hourly time steps of 0,1,2,3 past the base time.
193       
194        listType can also take the value "full list" where all values in the list are provided,
195        or "start end" where only the first and last value are given.
196        """ 
197        print """CSML: getDomain(variable, datasetURI)
198        Returns the full domain listing for a variable returning:
199       
200        [knownAxisString, id, longName, units, listType, unusedItem,
201        listValue-1, listValue-2, ..., listValue-n]
202       
203        For example:
204       
205        ["time", "time", "Time", "hours since 1999-09-09 00:00:00", "start end interval",
206        "", 0, 3, 6]
207       
208        This listType represents 6-hourly time steps of 0,1,2,3 past the base time.
209       
210        listType can also take the value "full list" where all values in the list are provided,
211        or "start end" where only the first and last value are given.
212
213CSML_END
214"""             
215        self._openDataFile(datasetGroup, dataset, datasetURI)
216        var=self._getVariable(variable)
217        varList=self.file.listvariables()
218        varList.sort()
219        varIndex=varList.index(var.id)
220        rtlist=[]
221       
222        axcount=0
223        for axis in var.getAxisList():
224            units=None
225            if axis.isTime():
226                knownAxis="time"
227                (start, end, (intervalValue, intervalUnits))=self.getTemporalDomain(datasetGroup, dataset, variable, datasetURI)
228                arrayValues=[start, end, intervalValue]
229                listType="start end interval"
230                units=intervalUnits
231            elif axis.isLevel():
232                knownAxis="level"
233                arrayValues=axis[:]
234                listType="full list"axisIndexString
235            elif axis.isLatitude():
236                knownAxis="latitude"
237                arrayValues=[axis[0], axis[-1]]
238                arrayValues.sort()
239                arrayValues.reverse()
240                listType="start end" 
241            elif axis.isLongitude():
242                knownAxis="longitude"
243                arrayValues=[axis[0], axis[-1]]
244                arrayValues.sort()     
245                listType="start end"                                   
246            else:
247                # For any axis not known as above
248                knownAxis=""
249                if len(axis[:])>200:
250                    arrayValues=[axis[0], axis[-1]]
251                    listType="start end"                   
252                else:
253                    arrayValues=axis[:]
254                    listType="full list"
255           
256            id=axis.id
257            longName=self._getBestName(axis).title()
258            if not units:  units=getattr(axis, "units", "")
259           
260            unused=""
261            rtlist.append([knownAxis, id, longName, units, listType, unused]+arrayValues)
262            axcount=axcount+1
263           
264        return rtlist
265       
266
267    def getHorizontalDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
268        """
269        Returns the horizontal domain as (northernExtent, westernExtent, southernExtent, easternExtent).
270        """
271        print "Deprecated: NOT NEEDED"
272        self._openDataFile(datasetGroup, dataset, datasetURI)
273        var=self._getVariable(variable)
274        lat=list(var.getLatitude()[:])
275        if lat[-1]<lat[0]: lat.reverse()
276        (southernExtent, northernExtent)=(lat[0], lat[-1])
277        lon=var.getLongitude()[:]
278        (westernExtent, easternExtent)=(lon[0], lon[-1])
279        return (northernExtent, westernExtent, southernExtent, easternExtent)
280
281
282    def getVerticalSpatialDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
283        """
284        Returns the vertical domain as a tuple containing
285        a list of levels (or "Single level" string) and the units.
286        """
287        print "Deprecated: NOT NEEDED" 
288        self._openDataFile(datasetGroup, dataset, datasetURI)
289        var=self._getVariable(variable)
290        try:
291            levels=var.getLevel()
292            vertical_units=levels.units
293            vertical_domain=tuple(map(lambda x: float(x), levels[:]))
294     
295        except:
296            vertical_domain=("Single level",)
297            vertical_units=None
298        return (vertical_domain, vertical_units)
299
300
301    def getTemporalDomain(self, datasetGroup=None, dataset=None, variable=None, datasetURI=None):
302        """
303        Returns the temporal domain as a tuple of (start time, end time,
304        (interval value, interval units)).
305        """
306        print "Deprecated: NOT NEEDED" 
307        self._openDataFile(datasetGroup, dataset, datasetURI)
308        var=self._getVariable(variable)
309        time_axis=var.getTime()
310        time_keys=("year", "month", "day", "hour", "minute", "second")
311        start_time_components=time_axis.asComponentTime()[0]
312        end_time_components=time_axis.asComponentTime()[-1]
313        start_time=[]
314        end_time=[]
315        for key in time_keys:
316            start_time.append(getattr(start_time_components, key))
317            end_time.append(getattr(end_time_components, key)) 
318        time_units=time_axis.units.split()[0]
319        if time_units[-1]=="s":  time_units=time_units[:-1]
320        if len(time_axis)>1:
321            interval_value=abs(time_axis[1]-time_axis[0])
322        else:
323            interval_value=1  # To stop loops breaking later!
324        return (start_time, end_time, (interval_value, time_units)) 
325
326
327    def getSelectedTimeSteps(self, datasetURI, variable, axisSelectionDict):
328        """
329        Returns a list of time step strings based on the selection.
330        """     
331        print """CSML: getSelectedTimeSteps(datasetURI, variable, axisSelectionDict)
332Returns a list of time step strings based on the selection.
333
334Inputs look like:
335('file:/usr/local/dx/testdata/testdata1.xml', "pqn", {"axis_1.1.1.0":("1999-01-01T00:00:00", "1999-01-01T06:00:00"),
336                                                      "axis_1.1.1.1":(30,-30), "axis_1.1.1.2":(30,-30)}
337                                                     
338Outputs look like:
339['1999-01-01 00:00:0.000000', '1999-01-01 06:00:0.000000']
340
341CSML_END
342"""             
343        self._openDataFile(datasetURI=datasetURI)
344        var=self._getVariable(variable)
345       
346        timeAxis=None
347        axcount=0
348        for axis in var.getAxisList():
349            if axis.isTime():
350                timeAxisIndex=axcount
351                timeAxis=axis
352            axcount=axcount+1
353
354        if timeAxis==None:
355            return []
356         
357        startDateTime=None   
358        for key in axisSelectionDict.keys():
359            #print key, axisSelectionDict[key]
360            axisIndex=int(key.split(".")[-1])
361            if axisIndex==timeAxisIndex:
362                (startDateTime, endDateTime)=axisSelectionDict[key][:2]
363       
364        if startDateTime==None:
365            return [str(tst) for tst in timeAxis.asComponentTime()]
366       
367        startDateTime=startDateTime.replace("T", " ")
368        items=startDateTime.split(":")
369        startDateTime=":".join(items[:-1])+":"+("%f" % float(items[-1]))
370        endDateTime=endDateTime.replace("T", " ")
371        items=endDateTime.split(":")
372        endDateTime=":".join(items[:-1])+":"+("%f" % float(items[-1]))
373       
374        timeSteps=timeAxis.asComponentTime()
375        selectedTimes=[]
376
377        for timeStep in timeSteps:
378            ts=timeStep
379            timeStep="%.4d-%.2d-%.2d %.2d:%.2d:%f" % (ts.year, ts.month, ts.day, \
380                                            ts.hour, ts.minute, ts.second)
381            #print str(timeStep), startDateTime
382            ts=str(timeStep)
383            if ts>endDateTime:
384                break
385            elif ts<startDateTime:
386                continue
387            else:
388                selectedTimes.append(ts)
389       
390        if selectedTimes==[]:
391            raise DXOptionHandlingError, "All selected time steps for '%s' are out of range, please go back and re-select." % variable
392
393        return selectedTimes
394
395
396    def getSelectedVariableArrayDetails(self, datasetURI, variable, axisSelectionDict):
397        """
398        Returns a tuple representing the (array shape, grid shape, size)
399        of the selected subset of a variable. Grid shape can be None if both latitude
400        and longitude axes are not present.
401        """     
402        print """CSML: getSelectedVariableArrayDetails(datasetURI, variable, axisSelectionDict)
403Returns a tuple representing the (array shape, grid shape, size)
404of the selected subset of a variable. Grid shape can be None if both latitude
405and longitude axes are not present.
406
407Inputs look like:
408('file:/usr/local/dx/testdata/testdata1.xml', "pqn", {"axis_1.1.1.0":("1999-01-01T00:00:00", "1999-01-01T06:00:00"),
409                                                      "axis_1.1.1.1":(30,-30), "axis_1.1.1.2":(30,-30)}
410                                                     
411Outputs look like:
412((2, 37, 72), (37, 72), 42624)
413
414CSML_END
415"""             
416        self._openDataFile(datasetURI=datasetURI)
417        var=self._getVariable(variable)
418        varType=var.typecode()
419               
420        timeAxisIndex=None
421        axcount=0
422        for axis in var.getAxisList():
423            if axis.isTime():
424                timeAxisIndex=axcount
425            axcount=axcount+1
426         
427        startDateTime=None   
428       
429        axesCounted=[]
430        axLens=[]
431        latLength=None
432        lonLength=None
433        size=1
434        for key in axisSelectionDict.keys():
435            #print key, axisSelectionDict[key]
436            axisIndex=int(key.split(".")[-1])
437            (low, high)=axisSelectionDict[key][:2]
438            axis=var.getAxis(axisIndex)
439           
440            if axisIndex==timeAxisIndex:
441                axlen=len(self.getSelectedTimeSteps(datasetURI, variable, {key:(low, high)}))           
442            elif low==high: 
443                axlen=1
444            else:
445                axlen=len(getValuesInRange(low, high, axis[:]))
446                if axlen==0:
447                    if (axis.isLongitude() or axis.isLatitude()) and low==high:
448                        print "Lat and lon can be axis length zero because we'll nudge to nearest if only one value given."
449                        axlen=1
450                    else:
451                        raise DXOptionHandlingError, "All selected '%s' axis values for '%s' are out of range, please go back and re-select." % (axis.id, variable)
452       
453                if axis.isLatitude():
454                    latLength=axlen
455                elif axis.isLongitude():
456                    lonLength=axlen
457                       
458            size=size*axlen
459            axesCounted.append(axisIndex)
460            axLens.append(axlen)
461       
462        axcount=0
463        arrayShape=[]
464        for axis in var.getAxisList():
465            if axcount not in axesCounted:
466                size=size*len(axis)
467                arrayShape.append(len(axis))
468            else:
469                arrayShape.append(axLens[axesCounted[axcount]]) 
470            if axis.isLatitude() and latLength==None:
471                latLength=len(axis)
472            elif axis.isLongitude() and lonLength==None:
473                lonLength=len(axis)   
474            axcount=axcount+1
475       
476        # Now work out gridShape if appropriate
477        if latLength and lonLength:
478            gridShape=(latLength, lonLength)
479        else:
480            gridShape=None
481       
482        if varType=="f":
483            size=size*4.
484        elif varType=="d":
485            size=size*8
486        elif varType=="i":
487            size=size   
488
489        return (tuple(arrayShape), gridShape, size)
490
491
492    def getSelectedVariableSubsetSize(self, datasetURI, varID, axisSelectionDict):
493        """
494        Returns the size in bytes of the selected subset of a variable.
495        """
496        print "IGNORE: Calls above method."
497        return self.getSelectedVariableArrayDetails(datasetURI, varID, axisSelectionDict)[2]
498
499       
500    def readVariableSubsetIntoMemory(self, datasetURI, variable, axisSelectionDict, timeStep=None):
501        """
502        Reads the variable with ID 'variable' into memory from file
503        'datasetURI' - sub-setting across all axes indicated in 'axisSelectionDict'.
504        If 'timeStep' is provided then override the time selection in 'axisSelectionDict'
505        with the 'timeStep' given.
506        """
507        print """CSML: getSelectedTimeSteps(datasetURI, variable, axisSelectionDict, timeStep=None)
508        Reads the variable with ID 'variable' into memory from file
509        'datasetURI' - sub-setting across all axes indicated in 'axisSelectionDict'.
510        If 'timeStep' is provided then override the time selection in 'axisSelectionDict'
511        with the 'timeStep' given.
512
513Inputs look like:
514('file:/usr/local/dx/testdata/testdata1.xml', "pqn", {"axis_1.1.1.0":("1999-01-01T00:00:00", "1999-01-01T06:00:00"),
515                                                      "axis_1.1.1.1":(30,-30), "axis_1.1.1.2":(30,-30)}, "1999-01-01T12:00:00")
516                                                     
517Output is a CDMS transient variable which is a python Numeric Masked Array with metadata attributes and axes attached.
518
519CSML_END
520"""     
521        self._openDataFile(datasetURI=datasetURI)
522        var=self._getVariable(variable)
523       
524        axisList=var.getAxisList()
525           
526        selectionStrings=[]
527        for key in axisSelectionDict.keys():
528            #print key, axisSelectionDict[key]
529            axisIndex=int(key.split(".")[-1])
530            axis=axisList[axisIndex]   
531            id=axis.id
532           
533            # deal with time differently
534            if axis.isTime():
535                if timeStep!=None:     
536                    timeStep=timeStep.replace("T", " ")                 
537                    selectionStrings.append('time="%s"' % timeStep)
538                else:
539                    selector=axisSelectionDict[key][:2]
540                    selector=(selector[0].replace("T", " "), selector[1].replace("T", " "))
541                    selectionStrings.append('%s=%s' % (axis.id, str(selector)))         
542            elif axis.isLatitude():
543                (low, high)=axisSelectionDict[key][:2]
544                if low==high:
545                    loworig=low
546                    (low, high, nudgeMessage)=nudgeSingleValuesToAxisValues(low, axis[:], "Latitude")   
547                    if loworig!=low:   print "Nudged latitudes to nearest points..."                   
548            elif axis.isLongitude():
549                (low, high)=axisSelectionDict[key][:2]
550                if low==high:
551                    loworig=low
552                    (low, high, nudgeMessage)=nudgeSingleValuesToAxisValues(low, axis[:], "Longitude") 
553                    if loworig!=low:   print "Nudged latitudes to nearest points..."                         
554            else:
555                selector=axisSelectionDict[key][:2]
556                selectionStrings.append('%s=%s' % (axis.id, str(selector)))
557       
558        fullSelectionString=", ".join(selectionStrings)
559        variableData=eval("self.file('%s', %s)" % (variable, fullSelectionString))
560        return variableData         
561       
562
563    def getCFGlobalAttributes(self, datafile):
564        """
565        Gets any CF metadta global attributes that are available
566        from the source dataset/file.
567        """
568        print """CSML: getCFGlobalAttributes(datasetURI)
569Gets any CF metadta global attributes that are available from the source dataset/file.
570
571Inputs look like:
572('file:/usr/local/dx/testdata/testdata1.xml')
573                                                     
574Output is a dictionary of attribute,value pairs got from the input file.
575
576CSML_ENDaxisIndexString
577"""             
578        if self.file==None: self._openDataFile(datasetURI=datafile)
579        gatts={}
580        CF_METADATA_GLOBAL_ATTRIBUTE_KEYS="Conventions title source institution history references comment".split()
581
582        for gatt in CF_METADATA_GLOBAL_ATTRIBUTE_KEYS:
583            if hasattr(self.file, gatt):
584                gatts[gatt]=self.file.__getattr__(gatt)
585       
586        return gatts
587
588
589if __name__=="__main__":       
590    a=CSMLDataHandler()
591    print a.getVariables(datasetURI='var1.nc')
592    print a.getVariables(datasetURI='var1.nc')   
593    print a.getDomain(datasetURI='var1.nc', variable="pqn")
594    print a.getCFGlobalAttributes('var1.nc')
595    print a.getSelectedVariableArrayDetails('var1.nc', "pqn", 
596                     {"axis_1.1.1.0":("1999-01-01T00:00:00", "1999-01-01T06:00:00"),
597                     "axis_1.1.1.1":(30,-30), "axis_1.1.1.2":(30,-30)})
Note: See TracBrowser for help on using the repository browser.