source: TI02-CSML/trunk/csml/API/genSubset.py @ 2206

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/csml/API/genSubset.py@2206
Revision 2206, 9.1 KB checked in by domlowe, 13 years ago (diff)

nearest neighbour working for times, plus separation of subsetting into core functions and feature specific ones

Line 
1'''#getSubset.py contains generic subsetting code that can be reused for multiple features
2Dominic Lowe, CCLRC, 31/01/2007'''
3
4import csml
5import csmlutils
6import sys
7import Numeric
8
9def checkNeighbours(domain, gridnames, **kwargs):
10    #for any non-range requests, get nearest neighbour
11    #e.g if 'latitude': (11) requested, latitude=12 may be the nearest value
12    for key in kwargs:
13        #handle single values
14        if type(kwargs[key]) is not tuple:
15            nearestNeighbour=csml.API.csmlutils.nudgeSingleValuesToAxisValues(kwargs[key],domain[gridnames[key]])
16            if nearestNeighbour is not None:
17                kwargs[key]=nearestNeighbour   
18        else:
19            #handle ranges
20            tmpkey=[]
21            for val in kwargs[key]:
22                nearestNeighbour=csml.API.csmlutils.nudgeSingleValuesToAxisValues(val, domain[gridnames[key]])
23                if nearestNeighbour is not None:
24                    tmpkey.append(nearestNeighbour)   
25                kwargs[key]=tuple(tmpkey)
26    return kwargs
27
28def subsetDomain(timeaxis,times, domain,**kwargs):
29    '''takes the domain and returns a subset according to the keyword selection criteria
30    time = name of time dimension
31    times = string of times
32    '''   
33    strTimes=times
34    subsetDomain={}
35    totalArraySize=1
36    for key in domain.keys():
37        print key
38        straxisValues=''
39        if key in kwargs:
40            if key ==timeaxis:
41                straxisValues=strTimes
42                arraySize=len(strTimes.split())
43            elif type(kwargs[key]) is not tuple:
44                #only one value not a range
45                straxisValues=str(kwargs[key]) 
46            elif kwargs[key][0] < kwargs[key][1]:   
47                for val in domain[key]:
48                      if val is not '':
49                            if float(val) >= kwargs[key][0]:
50                                if float(val) <= kwargs[key] [1]:
51                                    straxisValues=straxisValues+ str(val) + ','
52                straxisValues=straxisValues[:-1]
53            else:#this if deals with selections such as longitude (330,30) where the lower limit is 'greater' than the upper limit in a mathematical sense.
54                    for val in domain[key]:
55                        if val is not '':
56                            if val >= kwargs[key][0]:
57                                straxisValues=straxisValues+ str(val) + ','
58                    for val in domain[key]:
59                        if val is not '':
60                            if val <= kwargs[key] [1]:
61                                straxisValues=straxisValues+ str(val) + ','
62                    straxisValues=straxisValues[:-1]                                             
63        else: # this dimension has not been subsetted at all - CHECK  THIS
64            for val in domain[key]:
65                if val is not '':
66                    straxisValues=straxisValues+ str(val) + ','       
67            straxisValues=straxisValues[:-1]                       
68                                           
69        subsetDomain[key]=straxisValues
70        if key != timeaxis:
71            arraySize=len(subsetDomain[key].split(','))
72        else:
73            arraySize=len(subsetDomain[key].split())
74        totalArraySize = totalArraySize * arraySize
75    return subsetDomain, totalArraySize
76   
77def _setCalendar(feature, timeName):
78    '''sets the cdtime calendar needed for conversion from CSML to NetCDF'''
79    calset=False
80    for gridOrd in feature.value.gridSeriesDomain.coordTransformTable.gridOrdinates:
81        if gridOrd.coordAxisLabel.CONTENT==timeName:
82            try:
83                caltype=gridOrd.coordAxisValues.frame.split(':',1)[0]
84                calunits=gridOrd.coordAxisValues.frame.split(':',1)[1]
85                csml.csmllibs.csmltime.setcdtimeCalendar(caltype)
86                calset=True
87            except:pass
88    if calset!=True:
89        csml.csmllibs.csmltime.setcdtimeCalendar(csml.csmllibs.csmltime.cdtime.DefaultCalendar)   
90    try:
91        caltype=gridOrd.coordAxisValues.frame.split(':',1)[0]
92        calunits=gridOrd.coordAxisValues.frame.split(':',1)[1]
93        csml.csmllibs.csmltime.setcdtimeCalendar(caltype)
94    except:
95        csml.csmllibs.csmltime.setcdtimeCalendar(csml.csmllibs.csmltime.cdtime.DefaultCalendar)
96    return calunits, caltype
97
98
99def _getTimes(timeSelection, timeName, domain):
100    #currently supporting domain subsetting only by CRS name
101    #(but should be easy to extend later)
102    if type(timeSelection) is str:
103        times=(timeSelection,)
104    else:
105        times=timeSelection
106   
107    if len(times)==0:
108        #no time specified, select all.
109        times= domain[timeName]
110    tone=csml.csmllibs.csmltime.getCDtime(times[0])
111    if len(times) == 2:
112        #then this is a range of times (t1, tn), and not a list
113        try:
114            tone=csml.csmllibs.csmltime.getCDtime(times[0])
115        except:
116            tone=times[0]
117        try:
118            ttwo=csml.csmllibs.csmltime.getCDtime(times[1])
119        except:
120            ttwo=times[1]
121        times=[]         
122        for time in domain[timeName]:
123            timeok=csml.csmllibs.csmltime.compareTimes(tone,time,ttwo)
124            if timeok ==1:
125                times.append(time)
126    return times
127
128def _getTimeToFileRatio(feature,domain, timeName):
129    if hasattr(feature.value.rangeSet, 'valueArray'):
130        if hasattr(feature.value.rangeSet.valueArray.valueComponent.quantityList, '__insertedExtract'):
131            numFiles= len( csmlutils.listify(feature.value.rangeSet.valueArray.valueComponent.quantityList.__insertedExtract.components)[0].fileList.fileNames.CONTENT.split())
132            return len(domain[timeName])/numFiles
133
134def getCoordTransformTable(domainSubset, crs):
135    cTT=csml.parser.GridCoordinatesTable()
136    ords =[]
137    for key in domainSubset.keys():
138        go=csml.parser.GridOrdinateDescription()
139        go.coordAxisLabel=csml.parser.csString(key)
140        go.gridAxesSpanned=csml.parser.csString(key)
141        go.coordAxisValues = csml.parser.SpatialOrTemporalPositionList()
142        if key==crs.axes[crs.timeAxis]:
143            go.coordAxisValues.timePositionList=csml.parser.csString(domainSubset[key]) #self.domain.key placeholder
144        else:
145            go.coordAxisValues.coordinateList=csml.parser.csString(domainSubset[key]) #self.domain.key placeholder
146        ords.append(go) #go needs a few more properties setting
147    cTT.gridOrdinates=ords
148    return cTT
149
150
151
152def getTheData(feature, selection, times,timeName):
153    #SOME OF THIS SHOULD PROBABLY BE IN THE DATA IO LAYER
154    domain = feature.domain
155    value=feature.value
156    files=[]
157    strTimes=''
158    fulldata=None
159    #get the ratio of times to files
160    timeToFileRatio = _getTimeToFileRatio(feature, domain, timeName)
161       
162           
163    #list to keep track of files that have already been fetched. eg. if multiple times are in a single file only need to get data from that file once...
164    filesFetched=[]
165    for time in times:
166        listPosition=domain[timeName].index(time)
167        strTimes= strTimes + ' ' + time
168        for comp in csmlutils.listify(value.rangeSet.valueArray.valueComponent.quantityList.__insertedExtract.components): 
169            filePos=int(float(listPosition)/timeToFileRatio)
170            if filePos in filesFetched:
171                continue #already got data from this file, try next time
172            data, fillvalue, axisorder, units=comp.getData(fileposition=filePos, **selection)
173            files.append(comp.fileList.fileNames.CONTENT[filePos]) #TODO, get the right file name
174            if fulldata is None:
175                fulldata=data
176            else:
177                newfulldata=Numeric.concatenate([fulldata,data], axis=0)   
178                fulldata=newfulldata
179            filesFetched.append(filePos)
180    units.append(value.rangeSet.valueArray.valueComponent.quantityList.uom) # final unit is that of the
181    return strTimes, axisorder, units, fulldata, fillvalue
182
183def genericSubset(feature, csmlpath, ncpath, domain, kwargs):
184    if ncpath is not None:
185        pathToSubsetNetCDF=ncpath
186    else:
187        pathToSubsetNetCDF='temp.nc'
188   
189    #deal with longitude requests
190    #if the request is in -ve,+ve eg (-30,30) but the data is in (0,360) need to handle this by changing the args.
191    kwargs=csmlutils.fixLongitude(domain,kwargs)
192       
193    #get the name of the time axis in the coordinate reference system
194    cat=csml.csmllibs.csmlcrs.CRSCatalogue()
195    crs=cat.getCRS(feature.value.gridSeriesDomain.srsName)
196    timeName=crs.axes[crs.timeAxis]
197    #Find the original time name (timeAxis) for the file.
198    for gridOrd in feature.value.gridSeriesDomain.coordTransformTable.gridOrdinates:
199        if gridOrd.coordAxisLabel.CONTENT==timeName:
200            timeAxis = gridOrd.gridAxesSpanned.CONTENT  #only works for regular grids atm
201            break
202    #set the reference system for the time axis
203    calunits, caltype=_setCalendar(feature, timeName)
204   
205    #selection by time - get explcit time values, not just a range mint, maxt
206   
207    try:
208        timeSelection=kwargs[timeAxis]
209    except KeyError:
210        timeSelection=[]
211       
212    times=_getTimes(timeSelection, timeName,domain)
213   
214   
215    return pathToSubsetNetCDF, kwargs, timeAxis,timeName, calunits, caltype, times
Note: See TracBrowser for help on using the repository browser.