source: TI02-CSML/branches/csml-cdms2/API/genSubset.py @ 3627

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/branches/csml-cdms2/API/genSubset.py@3627
Revision 3627, 15.0 KB checked in by spascoe, 12 years ago (diff)

This branch contains CSML converted to use cdat_lite-5.

  • convertcdms was run on the source
  • the MA.set_print_limit call was changed to the numpy equivilent
  • The tests were changed to account for the existence of numpy scalar types.

All tests, except the two known to fail, pass on i686 ubuntu.

Line 
1# Adapted for numpy/ma/cdms2 by convertcdms.py
2'''getSubset.py contains generic subsetting code that can be reused for multiple features
3Dominic Lowe, CCLRC, 31/01/2007'''
4
5import csml
6import csmlutils
7import sys
8import numpy.oldnumeric as Numeric
9
10def checkNeighbours(domain, **kwargs):
11    '''
12    Compares a subset request (kwargs) to the feature domain and gets nearest neighbour
13    e.g if 'latitude': (11) requested, latitude=12 may be the nearest value, calls nudgeSingleValuesToAxisValues
14    @param domain:   dictionary
15    @param kwargs: dictionary
16    returns:    modified kwargs
17   
18    '''
19    for key in kwargs:
20        #handle single values
21        if type(kwargs[key]) is not tuple:
22            if type(domain[key]) is not list:
23                axeslist=domain[key].tolist()
24            else:
25                axeslist = domain[key]
26            nearestNeighbour=csml.API.csmlutils.nudgeSingleValuesToAxisValues(kwargs[key],axeslist)
27            if nearestNeighbour is not None:
28                kwargs[key]=nearestNeighbour   
29        else:                                 
30            tmpkey=[]
31            for val in kwargs[key]:
32                nearestNeighbour=csml.API.csmlutils.nudgeSingleValuesToAxisValues(val, domain[key], minBound=kwargs[key][0], maxBound=kwargs[key][1])               
33                if nearestNeighbour is not None:
34                    tmpkey.append(nearestNeighbour)   
35            kwargs[key]=tuple(tmpkey)           
36    return kwargs
37
38def subsetDomain(timeaxis,times, domain,**kwargs):
39    '''takes the domain and returns a subset according to the keyword selection criteria
40    @param timeaxis:  name of time dimension
41    @param times:    string of times
42    @param domain:     feature domain dictionary
43    @param kwargs:      keyword selection
44    @return:    subsetted domain (dictionary) and a totalArraySize (integer)
45   '''       
46    #reduce requests with same max and min eg (45,45) down to single value.
47    for kw in kwargs:
48        if type (kwargs[kw]) is tuple:
49            if kwargs[kw][0] == kwargs[kw][1]:
50                kwargs[kw]=kwargs[kw][0]
51    strTimes=times
52    subsetDomain={}
53    totalArraySize=1
54    for key in domain.keys():
55        straxisValues=''
56        if key in kwargs:
57            if key ==timeaxis:
58                straxisValues=strTimes
59                arraySize=len(strTimes.split())
60            elif type(kwargs[key]) is not tuple:
61                #only one value not a range
62                straxisValues=str(kwargs[key]) 
63            elif kwargs[key][0] < kwargs[key][1]:   
64                for val in domain[key]:
65                      if val is not '':
66                            if float(val) >= kwargs[key][0]:
67                                if float(val) <= kwargs[key] [1]:
68                                    straxisValues=straxisValues+ str(val) + ','
69                straxisValues=straxisValues[:-1]
70            else:#this if deals with selections such as longitude (330,30) where the lower limit is 'greater' than the upper limit in a mathematical sense.
71                    for val in domain[key]:
72                        if val is not '':
73                            if val >= kwargs[key][0]:
74                                straxisValues=straxisValues+ str(val) + ','
75                    for val in domain[key]:
76                        if val is not '':
77                            if val <= kwargs[key] [1]:
78                                straxisValues=straxisValues+ str(val) + ','
79                    straxisValues=straxisValues[:-1]                                             
80        else: # this dimension has not been subsetted at all - CHECK  THIS
81            for val in domain[key]:
82                if val is not '':
83                    straxisValues=straxisValues+ str(val) + ','       
84            straxisValues=straxisValues[:-1]                       
85                                           
86        subsetDomain[key]=straxisValues
87        if key != timeaxis:
88            arraySize=len(subsetDomain[key].split(','))
89        else:
90            arraySize=len(subsetDomain[key].split())
91        totalArraySize = totalArraySize * arraySize
92   
93   
94    #Finally, check the order of the subsetDomain is the same as the order of the kwargs:
95    #eg. if the kwargs = (-90,90), then the subsetDomain  should not be 90 to -90.
96    KWordering={}   
97    for kw in kwargs:
98        if type (kwargs[kw]) is tuple:
99            if kw in ['longitude', 'time']:  #TODO should get these names from the CRS...
100                continue
101            if len(kwargs[kw]) >1: 
102                if kwargs[kw][0] > kwargs[kw][-1]:
103                    KWordering[kw]= 'highlow'
104                else:
105                    KWordering[kw]= 'lowhigh'
106           
107    for kw in KWordering:
108        if eval(subsetDomain[kw].split(',')[0]) > eval(subsetDomain[kw].split(',')[-1]):
109            subOrder='highlow'
110        else:
111            subOrder='lowhigh'
112        #if order needs reversing, reverse the values in the subset domain string           
113        if subOrder != KWordering[kw]:
114            valList=[]
115            valList= subsetDomain[kw].split(',')
116            valStr=''
117            valList.reverse()
118            for val in valList:
119                valStr=valStr + ',' + val   
120            subsetDomain[kw] = valStr[1:]
121    return subsetDomain, totalArraySize
122   
123def _setCalendar(feature, timeName, ords):
124    '''sets the cdtime calendar needed for conversion from CSML to NetCDF'''
125    calset=False
126    for gridOrd in ords:
127        if gridOrd.coordAxisLabel.CONTENT==timeName:     
128            try:
129                caltype=gridOrd.coordAxisValues.timePositionList.frame.split(':',2)[1]
130                csml.csmllibs.csmltime.setcdtimeCalendar(caltype)
131                calset=True
132            except:pass
133    if calset!=True:
134        csml.csmllibs.csmltime.setcdtimeCalendar(csml.csmllibs.csmltime.cdtime.DefaultCalendar)       
135    try:
136        caltype=gridOrd.coordAxisValues.timePositionList.frame.split(':',2)[1]
137        csml.csmllibs.csmltime.setcdtimeCalendar(caltype)
138    except:
139        csml.csmllibs.csmltime.setcdtimeCalendar(csml.csmllibs.csmltime.cdtime.DefaultCalendar)     
140        caltype='standard' 
141    return caltype
142
143
144def _getTimes(timeSelection, timeName, domain):
145    ''' Given a selection of times (min_t, max_t) or (t1, t2, t3,...tn) or just 't' and the name of the time axis in the CRS, and the domain, returns subset of times
146    @param timeSelection:  times - in one of 3 forms: (min_t, max_t) or (t1, t2, t3,...tn) or just 't'
147    @param timeName:      name of the time axis
148    @param domain:        the domain dictionary
149    @return:     subset of times. e.g if a range is selected, returns all the times in that range. Times are correctly formatted.
150    '''
151    #currently supporting domain subsetting only by CRS name
152    #(but should be easy to extend later)
153    if type(timeSelection) in [str,unicode]:
154        times=(timeSelection,)
155    else:
156        times=timeSelection
157    if len(times)==0:
158        #no time specified, select all.
159        times= domain[timeName]
160    tone=csml.csmllibs.csmltime.getCDtime(times[0])
161   
162    if len(times) == 2:
163        #then this is a range of times (t1, tn), and not a list
164        try:
165            tone=csml.csmllibs.csmltime.getCDtime(times[0])
166        except:
167            tone=times[0]
168        try:
169            ttwo=csml.csmllibs.csmltime.getCDtime(times[1])
170        except:
171            ttwo=times[1]
172        times=[]         
173        for time in domain[timeName]:
174            timeok=csml.csmllibs.csmltime.compareTimes(tone,time,ttwo)
175            if timeok ==1:
176                times.append(time)
177    return times
178
179def _getTimeToFileRatio(feature,domain, timeName):
180    ''' Calculates the ratio of times to files... i.e are there 5 times in each data file? 500 times?
181    @param feature: a feature instance
182    @param domain: the domain dictonary
183    @param timeName: the name of the time axis in the domain dictionary
184    @return:    ratio of times to files (integer)
185    '''
186   
187    if hasattr(feature.value.rangeSet, 'valueArray'):
188        if hasattr(feature.value.rangeSet.valueArray.valueComponent, 'insertedExtract'):           
189            try:
190                numFiles= len( csmlutils.listify(feature.value.rangeSet.valueArray.valueComponent.insertedExtract.components)[0].fileList.fileNames.CONTENT.split())
191            except:
192                try:
193                    numFiles= len(csmlutils.listify(feature.value.rangeSet.valueArray.valueComponent.insertedExtract.components)[0].fileName.CONTENT)
194                except:
195                    numFiles= len(csmlutils.listify(feature.value.rangeSet.valueArray.valueComponent.insertedExtract.fileName.CONTENT))
196            timeToFileRatio= len(domain[timeName])/numFiles
197            if timeToFileRatio==0:
198                timeToFileRatio=None           
199            return timeToFileRatio
200
201def getCoordTransformTable(domainSubset, crs, frame):
202    '''
203    Given a domainSubset and a crs and a temporal reference fram returns a csml.parser.coordTransformTable object
204    @param domainSubset:    dictionary
205    @param crs:    a crs identifier
206    @param frame:      a temporal reference system identifier
207    '''
208    cTT=csml.parser.GridCoordinatesTable()
209    ords =[]
210    for key in domainSubset.keys():
211        go=csml.parser.GridOrdinateDescription()
212        go.coordAxisLabel=csml.parser.csString(key)
213        go.gridAxesSpanned=csml.parser.csString(key)
214        go.coordAxisValues = csml.parser.SpatialOrTemporalPositionList()
215        go.coordAxisValues.id=csml.csmllibs.csmlextra.getRandomID()
216        vals=str(domainSubset[key]).replace(',',' ')
217       
218        if key==crs.axes[crs.timeAxis]:
219            go.coordAxisValues.timePositionList=csml.parser.csString(vals)
220            go.coordAxisValues.frame = frame
221            #self.domain.key placeholder
222        else:
223            go.coordAxisValues.coordinateList=csml.parser.csString(vals) #self.domain.key placeholder
224        seqRule= csml.parser.SequenceRule()
225        seqRule.CONTENT='Linear'
226        seqRule.axisOrder='+1'  #TO DO. Work this out.
227        go.sequenceRule=seqRule
228        ords.append(go) #go needs a few more properties setting
229    cTT.gridOrdinates=ords
230    return cTT
231
232
233
234def getTheData(feature, selection, times,timeName, timeAxis):
235    '''
236    Wraps a lot of the detail involved in making a request for data
237    @param feature:    a feature instance
238    @param selection:    the selection object
239    @param times:       the times to select
240    @param timeName:     the name of the time axis
241    @param timeaxis: TODO REMOVE THIS.
242    @return:  times, axisorder, units, fulldata, fillvalue, where fulldata contains the actual data array, and the rest is ancilliary info about the data.
243    '''
244   
245    #SOME OF THIS SHOULD PROBABLY BE IN THE DATA IO LAYER
246    #add time range back into the selection
247
248    if len(times)==1:
249        selection[timeName]=times[0]
250    else:
251         selection[timeName]=(times[0], times[len(times)-1])
252    domain = feature.domain
253    value=feature.value
254    files=[]
255    strTimes=''
256    fulldata=None
257    #get the ratio of times to files
258    timeToFileRatio = _getTimeToFileRatio(feature, domain, timeName)
259           
260    #list to keep track of files that have already been fetched. eg. if multiple times are in a single file only need to get data from that file once...
261    filesFetched=[]
262     
263    #get list of file extracts
264    try:
265        componentlist = csmlutils.listify(value.rangeSet.valueArray.valueComponent.insertedExtract.components)
266    except:
267        componentlist = csmlutils.listify(value.rangeSet.valueArray.valueComponent.insertedExtract)
268    for time in times:
269        listPosition=domain[timeName].index(time)
270        strTimes= strTimes + ' ' + time                   
271        for comp in componentlist:
272            if timeToFileRatio == None:
273                filePos=None
274            else:               
275                filePos=int(float(listPosition)/timeToFileRatio)
276            if filePos in filesFetched:
277                continue #already got data from this file, try next time         
278            data, fillvalue, axisorder, units=comp.getData(fileposition=filePos, **selection)
279            if filePos is None:
280                files.append(comp.fileName.CONTENT)
281            else:
282                try:
283                    files.append(comp.fileList.fileNames.CONTENT[filePos]) #TODO, get the right file name
284                except:
285                    files.append(comp.fileName.CONTENT)
286            if fulldata is None:
287                fulldata=data
288            else:
289                newfulldata=Numeric.concatenate([fulldata,data], axis=0)   
290                fulldata=newfulldata
291            filesFetched.append(filePos)
292    if hasattr(value.rangeSet, 'valueArray'):
293        units.append(value.rangeSet.valueArray.valueComponent.uom) # final unit is that of the parameter
294    else:
295        units.append ('unitsTBA')
296 
297    return strTimes, axisorder, units, fulldata, fillvalue
298
299def genericSubset(feature, outputdir, ncname, domain, kwargs):
300    '''
301    High level wrapper method for subsetting features, calls most of the other functions in this module.
302    @param feature:   a feature instance
303    @param outputdir:    outputdir to write files
304    @param  ncname:    name of netcdf file to create
305    @param domain:      domain of feature(dictionary)
306    @param kwargs:       subset request (dictionary)
307    '''
308   
309    if outputdir is not None:
310        pathToSubsetNetCDF=outputdir+'/' +ncname
311    else:
312        pathToSubsetNetCDF=ncname
313   
314    #deal with longitude requests
315    #if the request is in -ve,+ve eg (-30,30) but the data is in (0,360) need to handle this by changing the args.
316    kwargs=csmlutils.fixLongitude(domain,kwargs)
317   
318   
319    #if request doesn't match domain points find nearest neighbours
320    kwargs=csml.API.genSubset.checkNeighbours(domain, **kwargs)
321   
322    #get the name of the time axis in the coordinate reference system
323    cat=csml.csmllibs.csmlcrs.CRSCatalogue()
324    if type(feature) == csml.parser.GridSeriesFeature:
325        crs=cat.getCRS(feature.value.gridSeriesDomain.srsName)
326        gridordinates = feature.value.gridSeriesDomain.coordTransformTable.gridOrdinates
327    elif type(feature) == csml.parser.ProfileSeriesFeature:
328        crs=cat.getCRS(feature.value.profileSeriesDomain.srsName)
329        gridordinates = feature.value.profileSeriesDomain.coordTransformTable.gridOrdinates
330    elif type(feature) == csml.parser.TrajectoryFeature:
331        crs=cat.getCRS(feature.value.trajectoryDomain.srsName)
332        gridordinates = feature.value.trajectoryDomain.coordTransformTable.gridOrdinates
333    timeName=crs.axes[crs.timeAxis]
334    #Find the original time name (timeAxis) for the file.
335    for gridOrd in gridordinates:
336        if gridOrd.coordAxisLabel.CONTENT==timeName:
337            timeAxis = gridOrd.gridAxesSpanned.CONTENT  #only works for regular grids atm
338            break
339    #set the reference system for the time axis
340    caltype=_setCalendar(feature, timeName, gridordinates)
341    try:
342        timeSelection=kwargs[timeName]
343    except KeyError:
344        timeSelection=[]
345    times=_getTimes(timeSelection, timeName,domain)       
346    return pathToSubsetNetCDF, kwargs, timeAxis,timeName, caltype, times
Note: See TracBrowser for help on using the repository browser.