source: TI02-CSML/trunk/csml/API/genSubset.py @ 3267

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/csml/API/genSubset.py@3267
Revision 3267, 14.9 KB checked in by domlowe, 12 years ago (diff)

Documenting CSML code - not complete

Line 
1'''getSubset.py contains generic subsetting code that can be reused for multiple features
2Dominic Lowe, CCLRC, 31/01/2007'''
3
4import csml
5import csmlutils
6import sys
7import Numeric
8
9def checkNeighbours(domain, **kwargs):
10    '''
11    Compares a subset request (kwargs) to the feature domain and gets nearest neighbour
12    e.g if 'latitude': (11) requested, latitude=12 may be the nearest value, calls nudgeSingleValuesToAxisValues
13    @param domain:   dictionary
14    @param kwargs: dictionary
15    returns:    modified kwargs
16   
17    '''
18    for key in kwargs:
19        #handle single values
20        if type(kwargs[key]) is not tuple:
21            if type(domain[key]) is not list:
22                axeslist=domain[key].tolist()
23            else:
24                axeslist = domain[key]
25            nearestNeighbour=csml.API.csmlutils.nudgeSingleValuesToAxisValues(kwargs[key],axeslist)
26            if nearestNeighbour is not None:
27                kwargs[key]=nearestNeighbour   
28        else:                                 
29            tmpkey=[]
30            for val in kwargs[key]:
31                nearestNeighbour=csml.API.csmlutils.nudgeSingleValuesToAxisValues(val, domain[key], minBound=kwargs[key][0], maxBound=kwargs[key][1])               
32                if nearestNeighbour is not None:
33                    tmpkey.append(nearestNeighbour)   
34            kwargs[key]=tuple(tmpkey)           
35    return kwargs
36
37def subsetDomain(timeaxis,times, domain,**kwargs):
38    '''takes the domain and returns a subset according to the keyword selection criteria
39    @param timeaxis:  name of time dimension
40    @param times:    string of times
41    @param domain:     feature domain dictionary
42    @param kwargs:      keyword selection
43    @return:    subsetted domain (dictionary) and a totalArraySize (integer)
44   '''       
45    #reduce requests with same max and min eg (45,45) down to single value.
46    for kw in kwargs:
47        if type (kwargs[kw]) is tuple:
48            if kwargs[kw][0] == kwargs[kw][1]:
49                kwargs[kw]=kwargs[kw][0]
50    strTimes=times
51    subsetDomain={}
52    totalArraySize=1
53    for key in domain.keys():
54        straxisValues=''
55        if key in kwargs:
56            if key ==timeaxis:
57                straxisValues=strTimes
58                arraySize=len(strTimes.split())
59            elif type(kwargs[key]) is not tuple:
60                #only one value not a range
61                straxisValues=str(kwargs[key]) 
62            elif kwargs[key][0] < kwargs[key][1]:   
63                for val in domain[key]:
64                      if val is not '':
65                            if float(val) >= kwargs[key][0]:
66                                if float(val) <= kwargs[key] [1]:
67                                    straxisValues=straxisValues+ str(val) + ','
68                straxisValues=straxisValues[:-1]
69            else:#this if deals with selections such as longitude (330,30) where the lower limit is 'greater' than the upper limit in a mathematical sense.
70                    for val in domain[key]:
71                        if val is not '':
72                            if val >= kwargs[key][0]:
73                                straxisValues=straxisValues+ str(val) + ','
74                    for val in domain[key]:
75                        if val is not '':
76                            if val <= kwargs[key] [1]:
77                                straxisValues=straxisValues+ str(val) + ','
78                    straxisValues=straxisValues[:-1]                                             
79        else: # this dimension has not been subsetted at all - CHECK  THIS
80            for val in domain[key]:
81                if val is not '':
82                    straxisValues=straxisValues+ str(val) + ','       
83            straxisValues=straxisValues[:-1]                       
84                                           
85        subsetDomain[key]=straxisValues
86        if key != timeaxis:
87            arraySize=len(subsetDomain[key].split(','))
88        else:
89            arraySize=len(subsetDomain[key].split())
90        totalArraySize = totalArraySize * arraySize
91   
92   
93    #Finally, check the order of the subsetDomain is the same as the order of the kwargs:
94    #eg. if the kwargs = (-90,90), then the subsetDomain  should not be 90 to -90.
95    KWordering={}   
96    for kw in kwargs:
97        if type (kwargs[kw]) is tuple:
98            if kw in ['longitude', 'time']:  #TODO should get these names from the CRS...
99                continue
100            if len(kwargs[kw]) >1: 
101                if kwargs[kw][0] > kwargs[kw][-1]:
102                    KWordering[kw]= 'highlow'
103                else:
104                    KWordering[kw]= 'lowhigh'
105           
106    for kw in KWordering:
107        if eval(subsetDomain[kw].split(',')[0]) > eval(subsetDomain[kw].split(',')[-1]):
108            subOrder='highlow'
109        else:
110            subOrder='lowhigh'
111        #if order needs reversing, reverse the values in the subset domain string           
112        if subOrder != KWordering[kw]:
113            valList=[]
114            valList= subsetDomain[kw].split(',')
115            valStr=''
116            valList.reverse()
117            for val in valList:
118                valStr=valStr + ',' + val   
119            subsetDomain[kw] = valStr[1:]
120    return subsetDomain, totalArraySize
121   
122def _setCalendar(feature, timeName, ords):
123    '''sets the cdtime calendar needed for conversion from CSML to NetCDF'''
124    calset=False
125    for gridOrd in ords:
126        if gridOrd.coordAxisLabel.CONTENT==timeName:     
127            try:
128                caltype=gridOrd.coordAxisValues.timePositionList.frame.split(':',2)[1]
129                csml.csmllibs.csmltime.setcdtimeCalendar(caltype)
130                calset=True
131            except:pass
132    if calset!=True:
133        csml.csmllibs.csmltime.setcdtimeCalendar(csml.csmllibs.csmltime.cdtime.DefaultCalendar)       
134    try:
135        caltype=gridOrd.coordAxisValues.timePositionList.frame.split(':',2)[1]
136        csml.csmllibs.csmltime.setcdtimeCalendar(caltype)
137    except:
138        csml.csmllibs.csmltime.setcdtimeCalendar(csml.csmllibs.csmltime.cdtime.DefaultCalendar)     
139        caltype='standard' 
140    return caltype
141
142
143def _getTimes(timeSelection, timeName, domain):
144    ''' Given a selection of times (min_t, max_t) or (t1, t2, t3,...tn) or just 't' and the name of the time axis in the CRS, and the domain, returns subset of times
145    @param timeSelection:  times - in one of 3 forms: (min_t, max_t) or (t1, t2, t3,...tn) or just 't'
146    @param timeName:      name of the time axis
147    @param domain:        the domain dictionary
148    @return:     subset of times. e.g if a range is selected, returns all the times in that range. Times are correctly formatted.
149    '''
150    #currently supporting domain subsetting only by CRS name
151    #(but should be easy to extend later)
152    if type(timeSelection) in [str,unicode]:
153        times=(timeSelection,)
154    else:
155        times=timeSelection
156    if len(times)==0:
157        #no time specified, select all.
158        times= domain[timeName]
159    tone=csml.csmllibs.csmltime.getCDtime(times[0])
160   
161    if len(times) == 2:
162        #then this is a range of times (t1, tn), and not a list
163        try:
164            tone=csml.csmllibs.csmltime.getCDtime(times[0])
165        except:
166            tone=times[0]
167        try:
168            ttwo=csml.csmllibs.csmltime.getCDtime(times[1])
169        except:
170            ttwo=times[1]
171        times=[]         
172        for time in domain[timeName]:
173            timeok=csml.csmllibs.csmltime.compareTimes(tone,time,ttwo)
174            if timeok ==1:
175                times.append(time)
176    return times
177
178def _getTimeToFileRatio(feature,domain, timeName):
179    ''' Calculates the ratio of times to files... i.e are there 5 times in each data file? 500 times?
180    @param feature: a feature instance
181    @param domain: the domain dictonary
182    @param timeName: the name of the time axis in the domain dictionary
183    @return:    ratio of times to files (integer)
184    '''
185   
186    if hasattr(feature.value.rangeSet, 'valueArray'):
187        if hasattr(feature.value.rangeSet.valueArray.valueComponent, 'insertedExtract'):           
188            try:
189                numFiles= len( csmlutils.listify(feature.value.rangeSet.valueArray.valueComponent.insertedExtract.components)[0].fileList.fileNames.CONTENT.split())
190            except:
191                try:
192                    numFiles= len(csmlutils.listify(feature.value.rangeSet.valueArray.valueComponent.insertedExtract.components)[0].fileName.CONTENT)
193                except:
194                    numFiles= len(csmlutils.listify(feature.value.rangeSet.valueArray.valueComponent.insertedExtract.fileName.CONTENT))
195            timeToFileRatio= len(domain[timeName])/numFiles
196            if timeToFileRatio==0:
197                timeToFileRatio=None           
198            return timeToFileRatio
199
200def getCoordTransformTable(domainSubset, crs, frame):
201    '''
202    Given a domainSubset and a crs and a temporal reference fram returns a csml.parser.coordTransformTable object
203    @param domainSubset:    dictionary
204    @param crs:    a crs identifier
205    @param frame:      a temporal reference system identifier
206    '''
207    cTT=csml.parser.GridCoordinatesTable()
208    ords =[]
209    for key in domainSubset.keys():
210        go=csml.parser.GridOrdinateDescription()
211        go.coordAxisLabel=csml.parser.csString(key)
212        go.gridAxesSpanned=csml.parser.csString(key)
213        go.coordAxisValues = csml.parser.SpatialOrTemporalPositionList()
214        go.coordAxisValues.id=csml.csmllibs.csmlextra.getRandomID()
215        vals=str(domainSubset[key]).replace(',',' ')
216       
217        if key==crs.axes[crs.timeAxis]:
218            go.coordAxisValues.timePositionList=csml.parser.csString(vals)
219            go.coordAxisValues.frame = frame
220            #self.domain.key placeholder
221        else:
222            go.coordAxisValues.coordinateList=csml.parser.csString(vals) #self.domain.key placeholder
223        seqRule= csml.parser.SequenceRule()
224        seqRule.CONTENT='Linear'
225        seqRule.axisOrder='+1'  #TO DO. Work this out.
226        go.sequenceRule=seqRule
227        ords.append(go) #go needs a few more properties setting
228    cTT.gridOrdinates=ords
229    return cTT
230
231
232
233def getTheData(feature, selection, times,timeName, timeAxis):
234    '''
235    Wraps a lot of the detail involved in making a request for data
236    @param feature:    a feature instance
237    @param selection:    the selection object
238    @param times:       the times to select
239    @param timeName:     the name of the time axis
240    @param timeaxis: TODO REMOVE THIS.
241    @return:  times, axisorder, units, fulldata, fillvalue, where fulldata contains the actual data array, and the rest is ancilliary info about the data.
242    '''
243   
244    #SOME OF THIS SHOULD PROBABLY BE IN THE DATA IO LAYER
245    #add time range back into the selection
246
247    if len(times)==1:
248        selection[timeName]=times[0]
249    else:
250         selection[timeName]=(times[0], times[len(times)-1])
251    domain = feature.domain
252    value=feature.value
253    files=[]
254    strTimes=''
255    fulldata=None
256    #get the ratio of times to files
257    timeToFileRatio = _getTimeToFileRatio(feature, domain, timeName)
258           
259    #list to keep track of files that have already been fetched. eg. if multiple times are in a single file only need to get data from that file once...
260    filesFetched=[]
261     
262    #get list of file extracts
263    try:
264        componentlist = csmlutils.listify(value.rangeSet.valueArray.valueComponent.insertedExtract.components)
265    except:
266        componentlist = csmlutils.listify(value.rangeSet.valueArray.valueComponent.insertedExtract)
267    for time in times:
268        listPosition=domain[timeName].index(time)
269        strTimes= strTimes + ' ' + time                   
270        for comp in componentlist:
271            if timeToFileRatio == None:
272                filePos=None
273            else:               
274                filePos=int(float(listPosition)/timeToFileRatio)
275            if filePos in filesFetched:
276                continue #already got data from this file, try next time         
277            data, fillvalue, axisorder, units=comp.getData(fileposition=filePos, **selection)
278            if filePos is None:
279                files.append(comp.fileName.CONTENT)
280            else:
281                try:
282                    files.append(comp.fileList.fileNames.CONTENT[filePos]) #TODO, get the right file name
283                except:
284                    files.append(comp.fileName.CONTENT)
285            if fulldata is None:
286                fulldata=data
287            else:
288                newfulldata=Numeric.concatenate([fulldata,data], axis=0)   
289                fulldata=newfulldata
290            filesFetched.append(filePos)
291    if hasattr(value.rangeSet, 'valueArray'):
292        units.append(value.rangeSet.valueArray.valueComponent.uom) # final unit is that of the parameter
293    else:
294        units.append ('unitsTBA')
295 
296    return strTimes, axisorder, units, fulldata, fillvalue
297
298def genericSubset(feature, outputdir, ncname, domain, kwargs):
299    '''
300    High level wrapper method for subsetting features, calls most of the other functions in this module.
301    @param feature:   a feature instance
302    @param outputdir:    outputdir to write files
303    @param  ncname:    name of netcdf file to create
304    @param domain:      domain of feature(dictionary)
305    @param kwargs:       subset request (dictionary)
306    '''
307   
308    if outputdir is not None:
309        pathToSubsetNetCDF=outputdir+'/' +ncname
310    else:
311        pathToSubsetNetCDF=ncname
312   
313    #deal with longitude requests
314    #if the request is in -ve,+ve eg (-30,30) but the data is in (0,360) need to handle this by changing the args.
315    kwargs=csmlutils.fixLongitude(domain,kwargs)
316   
317   
318    #if request doesn't match domain points find nearest neighbours
319    kwargs=csml.API.genSubset.checkNeighbours(domain, **kwargs)
320   
321    #get the name of the time axis in the coordinate reference system
322    cat=csml.csmllibs.csmlcrs.CRSCatalogue()
323    if type(feature) == csml.parser.GridSeriesFeature:
324        crs=cat.getCRS(feature.value.gridSeriesDomain.srsName)
325        gridordinates = feature.value.gridSeriesDomain.coordTransformTable.gridOrdinates
326    elif type(feature) == csml.parser.ProfileSeriesFeature:
327        crs=cat.getCRS(feature.value.profileSeriesDomain.srsName)
328        gridordinates = feature.value.profileSeriesDomain.coordTransformTable.gridOrdinates
329    elif type(feature) == csml.parser.TrajectoryFeature:
330        crs=cat.getCRS(feature.value.trajectoryDomain.srsName)
331        gridordinates = feature.value.trajectoryDomain.coordTransformTable.gridOrdinates
332    timeName=crs.axes[crs.timeAxis]
333    #Find the original time name (timeAxis) for the file.
334    for gridOrd in gridordinates:
335        if gridOrd.coordAxisLabel.CONTENT==timeName:
336            timeAxis = gridOrd.gridAxesSpanned.CONTENT  #only works for regular grids atm
337            break
338    #set the reference system for the time axis
339    caltype=_setCalendar(feature, timeName, gridordinates)
340    try:
341        timeSelection=kwargs[timeName]
342    except KeyError:
343        timeSelection=[]
344    times=_getTimes(timeSelection, timeName,domain)       
345    return pathToSubsetNetCDF, kwargs, timeAxis,timeName, caltype, times
Note: See TracBrowser for help on using the repository browser.