source: TI02-CSML/trunk/csml/API/genSubset.py @ 2155

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/csml/API/genSubset.py@2155
Revision 2155, 8.8 KB checked in by domlowe, 14 years ago (diff)

Nudge to nearest neighbour function added to enable approximate requests for data - thanks Ag

Line 
1'''#getSubset.py contains generic subsetting code that can be reused for multiple features
2Dominic Lowe, CCLRC, 31/01/2007'''
3
4import csml
5import csmlutils
6import sys
7
8def checkNeighbours(domain, **kwargs):
9        #for any non-range requests, get nearest neighbour
10    #e.g if 'latitude': (11) requested, latitude=12 may be the nearest value
11    for key in kwargs:     
12        #handle single values
13        if type(kwargs[key]) is not tuple:
14            nearestNeighbour=csml.API.csmlutils.nudgeSingleValuesToAxisValues(kwargs[key],domain[key])
15            if nearestNeighbour is not None:
16                kwargs[key]=nearestNeighbour   
17        else:
18            #handle ranges
19            tmpkey=[]
20            for val in kwargs[key]:
21                nearestNeighbour=csml.API.csmlutils.nudgeSingleValuesToAxisValues(val, domain[key])
22                if nearestNeighbour is not None:
23                    tmpkey.append(nearestNeighbour)   
24                kwargs[key]=tuple(tmpkey)
25    return kwargs
26
27def subsetDomain(timeaxis,times, domain,**kwargs):
28    '''takes the domain and returns a subset according to the keyword selection criteria
29    time = name of time dimension
30    times = string of times
31    '''   
32    strTimes=times
33    subsetDomain={}
34    totalArraySize=1
35    for key in domain.keys():
36        print key
37        straxisValues=''
38        if key in kwargs:
39            if key ==timeaxis:
40                straxisValues=strTimes
41                arraySize=len(strTimes.split())
42            elif type(kwargs[key]) is not tuple:
43                #only one value not a range
44                straxisValues=str(kwargs[key]) 
45            elif kwargs[key][0] < kwargs[key][1]:   
46                for val in domain[key]:
47                      if val is not '':
48                            if float(val) >= kwargs[key][0]:
49                                if float(val) <= kwargs[key] [1]:
50                                    straxisValues=straxisValues+ str(val) + ','
51                straxisValues=straxisValues[:-1]
52            else:#this if deals with selections such as longitude (330,30) where the lower limit is 'greater' than the upper limit in a mathematical sense.
53                    for val in domain[key]:
54                        if val is not '':
55                            if val >= kwargs[key][0]:
56                                straxisValues=straxisValues+ str(val) + ','
57                    for val in domain[key]:
58                        if val is not '':
59                            if val <= kwargs[key] [1]:
60                                straxisValues=straxisValues+ str(val) + ','
61                    straxisValues=straxisValues[:-1]                                             
62        else: # this dimension has not been subsetted at all - CHECK  THIS
63            for val in domain[key]:
64                if val is not '':
65                    straxisValues=straxisValues+ str(val) + ','       
66            straxisValues=straxisValues[:-1]                       
67                                           
68        subsetDomain[key]=straxisValues
69        if key != timeaxis:
70            arraySize=len(subsetDomain[key].split(','))
71        else:
72            arraySize=len(subsetDomain[key].split())
73        totalArraySize = totalArraySize * arraySize
74    return subsetDomain, totalArraySize
75   
76def _setCalendar(feature, timeName):
77    '''sets the cdtime calendar needed for conversion from CSML to NetCDF'''
78    calset=False
79    for gridOrd in feature.value.gridSeriesDomain.coordTransformTable.gridOrdinates:
80        if gridOrd.coordAxisLabel.CONTENT==timeName:
81            try:
82                caltype=gridOrd.coordAxisValues.frame.split(':',1)[0]
83                calunits=gridOrd.coordAxisValues.frame.split(':',1)[1]
84                csml.csmllibs.csmltime.setcdtimeCalendar(caltype)
85                calset=True
86            except:pass
87    if calset!=True:
88        csml.csmllibs.csmltime.setcdtimeCalendar(csml.csmllibs.csmltime.cdtime.DefaultCalendar)   
89    try:
90        caltype=gridOrd.coordAxisValues.frame.split(':',1)[0]
91        calunits=gridOrd.coordAxisValues.frame.split(':',1)[1]
92        csml.csmllibs.csmltime.setcdtimeCalendar(caltype)
93    except:
94        csml.csmllibs.csmltime.setcdtimeCalendar(csml.csmllibs.csmltime.cdtime.DefaultCalendar)
95    return calunits, caltype
96
97
98def _getTimes(timeSelection, timeName, domain):
99    #currently supporting domain subsetting only by CRS name
100    #(but should be easy to extend later)
101    if type(timeSelection) is str:
102        times=(timeSelection,)
103    else:
104        times=timeSelection
105   
106   
107    #times=timeSelection
108   
109    print times
110    print len(times)
111    if len(times)==0:
112        #no time specified, select all.
113        times= domain[timeName]
114    tone=csml.csmllibs.csmltime.getCDtime(times[0])
115    if len(times) == 2:
116        #then this is a range of times (t1, tn), and not a list
117        try:
118            tone=csml.csmllibs.csmltime.getCDtime(times[0])
119        except:
120            tone=times[0]
121        try:
122            ttwo=csml.csmllibs.csmltime.getCDtime(times[1])
123        except:
124            ttwo=times[1]
125        times=[]         
126        for time in domain[timeName]:
127            timeok=csml.csmllibs.csmltime.compareTimes(tone,time,ttwo)
128            if timeok ==1:
129                times.append(time)
130    return times
131
132def _getTimeToFileRatio(feature,domain, timeName):
133    if hasattr(feature.value.rangeSet, 'valueArray'):
134        if hasattr(feature.value.rangeSet.valueArray.valueComponent.quantityList, '__insertedExtract'):
135            numFiles= len( csmlutils.listify(feature.value.rangeSet.valueArray.valueComponent.quantityList.__insertedExtract.components)[0].fileList.fileNames.CONTENT.split())
136            return len(domain[timeName])/numFiles
137
138def getCoordTransformTable(domainSubset, crs):
139    cTT=csml.parser.GridCoordinatesTable()
140    ords =[]
141    for key in domainSubset.keys():
142        go=csml.parser.GridOrdinateDescription()
143        go.coordAxisLabel=csml.parser.csString(key)
144        go.gridAxesSpanned=csml.parser.csString(key)
145        go.coordAxisValues = csml.parser.SpatialOrTemporalPositionList()
146        if key==crs.axes[crs.timeAxis]:
147            go.coordAxisValues.timePositionList=csml.parser.csString(domainSubset[key]) #self.domain.key placeholder
148        else:
149            go.coordAxisValues.coordinateList=csml.parser.csString(domainSubset[key]) #self.domain.key placeholder
150        ords.append(go) #go needs a few more properties setting
151    cTT.gridOrdinates=ords
152    return cTT
153
154
155
156def getTheData(feature, selection, times,timeName):
157    #SOME OF THIS SHOULD PROBABLY BE IN THE DATA IO LAYER
158    domain = feature.domain
159    value=feature.value
160    files=[]
161    strTimes=''
162    fulldata=[]
163    #get the ratio of times to files
164    timeToFileRatio = _getTimeToFileRatio(feature, domain, timeName)
165       
166           
167    #list to keep track of files that have already been fetched. eg. if multiple times are in a single file only need to get data from that file once...
168    filesFetched=[]
169    for time in times:
170        listPosition=domain[timeName].index(time)
171        strTimes= strTimes + ' ' + time
172        for comp in csmlutils.listify(value.rangeSet.valueArray.valueComponent.quantityList.__insertedExtract.components): 
173            filePos=int(float(listPosition)/timeToFileRatio)
174            if filePos in filesFetched:
175                continue #already got data from this file, try next time
176            data, fillvalue, axisorder, units=comp.getData(fileposition=filePos, **selection)
177            files.append(comp.fileList.fileNames.CONTENT[filePos]) #TODO, get the right file name
178            if fulldata ==[]:
179                fulldata = data
180            else:
181                for item in data:
182                    fulldata.append(item)
183            filesFetched.append(filePos)
184    units.append(value.rangeSet.valueArray.valueComponent.quantityList.uom) # final unit is that of the
185    return strTimes, axisorder, units, fulldata, fillvalue
186
187def genericSubset(feature, csmlpath, ncpath, domain, kwargs):
188    if ncpath is not None:
189        pathToSubsetNetCDF=ncpath
190    else:
191        pathToSubsetNetCDF='temp.nc'
192   
193    #deal with longitude requests
194    #if the request is in -ve,+ve eg (-30,30) but the data is in (0,360) need to handle this by changing the args.
195    kwargs=csmlutils.fixLongitude(domain,kwargs)
196       
197    #get the name of the time axis in the coordinate reference system
198    cat=csml.csmllibs.csmlcrs.CRSCatalogue()
199    crs=cat.getCRS(feature.value.gridSeriesDomain.srsName)
200    timeName=crs.axes[crs.timeAxis]
201     #set the reference system for the time axis
202    calunits, caltype=_setCalendar(feature, timeName)
203   
204    #selection by time - get explcit time values, not just a range mint, maxt
205    try:
206        timeSelection=kwargs[timeName]
207    except KeyError:
208        timeSelection=[]
209    times=_getTimes(timeSelection, timeName,domain)
210   
211   
212    return pathToSubsetNetCDF, kwargs, timeName, calunits, caltype, times
Note: See TracBrowser for help on using the repository browser.