1 | '''getSubset.py contains generic subsetting code that can be reused for multiple features |
---|
2 | Dominic Lowe, CCLRC, 31/01/2007''' |
---|
3 | |
---|
4 | import csml |
---|
5 | import csmlutils |
---|
6 | import sys |
---|
7 | import Numeric |
---|
8 | |
---|
9 | def checkNeighbours(domain, **kwargs): |
---|
10 | ''' |
---|
11 | Compares a subset request (kwargs) to the feature domain and gets nearest neighbour |
---|
12 | e.g if 'latitude': (11) requested, latitude=12 may be the nearest value, calls nudgeSingleValuesToAxisValues |
---|
13 | @param domain: dictionary |
---|
14 | @param kwargs: dictionary |
---|
15 | returns: modified kwargs |
---|
16 | |
---|
17 | ''' |
---|
18 | for key in kwargs: |
---|
19 | #handle single values |
---|
20 | if type(kwargs[key]) is not tuple: |
---|
21 | if type(domain[key]) is not list: |
---|
22 | axeslist=domain[key].tolist() |
---|
23 | else: |
---|
24 | axeslist = domain[key] |
---|
25 | nearestNeighbour=csml.API.csmlutils.nudgeSingleValuesToAxisValues(kwargs[key],axeslist) |
---|
26 | if nearestNeighbour is not None: |
---|
27 | kwargs[key]=nearestNeighbour |
---|
28 | else: |
---|
29 | tmpkey=[] |
---|
30 | for val in kwargs[key]: |
---|
31 | nearestNeighbour=csml.API.csmlutils.nudgeSingleValuesToAxisValues(val, domain[key], minBound=kwargs[key][0], maxBound=kwargs[key][1]) |
---|
32 | if nearestNeighbour is not None: |
---|
33 | tmpkey.append(nearestNeighbour) |
---|
34 | kwargs[key]=tuple(tmpkey) |
---|
35 | return kwargs |
---|
36 | |
---|
37 | def subsetDomain(timeaxis,times, domain,**kwargs): |
---|
38 | '''takes the domain and returns a subset according to the keyword selection criteria |
---|
39 | @param timeaxis: name of time dimension |
---|
40 | @param times: string of times |
---|
41 | @param domain: feature domain dictionary |
---|
42 | @param kwargs: keyword selection |
---|
43 | @return: subsetted domain (dictionary) and a totalArraySize (integer) |
---|
44 | ''' |
---|
45 | #reduce requests with same max and min eg (45,45) down to single value. |
---|
46 | for kw in kwargs: |
---|
47 | if type (kwargs[kw]) is tuple: |
---|
48 | if kwargs[kw][0] == kwargs[kw][1]: |
---|
49 | kwargs[kw]=kwargs[kw][0] |
---|
50 | strTimes=times |
---|
51 | subsetDomain={} |
---|
52 | totalArraySize=1 |
---|
53 | for key in domain.keys(): |
---|
54 | straxisValues='' |
---|
55 | if key in kwargs: |
---|
56 | if key ==timeaxis: |
---|
57 | straxisValues=strTimes |
---|
58 | arraySize=len(strTimes.split()) |
---|
59 | elif type(kwargs[key]) is not tuple: |
---|
60 | #only one value not a range |
---|
61 | straxisValues=str(kwargs[key]) |
---|
62 | elif kwargs[key][0] < kwargs[key][1]: |
---|
63 | for val in domain[key]: |
---|
64 | if val is not '': |
---|
65 | if float(val) >= kwargs[key][0]: |
---|
66 | if float(val) <= kwargs[key] [1]: |
---|
67 | straxisValues=straxisValues+ str(val) + ',' |
---|
68 | straxisValues=straxisValues[:-1] |
---|
69 | else:#this if deals with selections such as longitude (330,30) where the lower limit is 'greater' than the upper limit in a mathematical sense. |
---|
70 | for val in domain[key]: |
---|
71 | if val is not '': |
---|
72 | if val >= kwargs[key][0]: |
---|
73 | straxisValues=straxisValues+ str(val) + ',' |
---|
74 | for val in domain[key]: |
---|
75 | if val is not '': |
---|
76 | if val <= kwargs[key] [1]: |
---|
77 | straxisValues=straxisValues+ str(val) + ',' |
---|
78 | straxisValues=straxisValues[:-1] |
---|
79 | else: # this dimension has not been subsetted at all - CHECK THIS |
---|
80 | for val in domain[key]: |
---|
81 | if val is not '': |
---|
82 | straxisValues=straxisValues+ str(val) + ',' |
---|
83 | straxisValues=straxisValues[:-1] |
---|
84 | |
---|
85 | subsetDomain[key]=straxisValues |
---|
86 | if key != timeaxis: |
---|
87 | arraySize=len(subsetDomain[key].split(',')) |
---|
88 | else: |
---|
89 | arraySize=len(subsetDomain[key].split()) |
---|
90 | totalArraySize = totalArraySize * arraySize |
---|
91 | |
---|
92 | |
---|
93 | #Finally, check the order of the subsetDomain is the same as the order of the kwargs: |
---|
94 | #eg. if the kwargs = (-90,90), then the subsetDomain should not be 90 to -90. |
---|
95 | KWordering={} |
---|
96 | for kw in kwargs: |
---|
97 | if type (kwargs[kw]) is tuple: |
---|
98 | if kw in ['longitude', 'time']: #TODO should get these names from the CRS... |
---|
99 | continue |
---|
100 | if len(kwargs[kw]) >1: |
---|
101 | if kwargs[kw][0] > kwargs[kw][-1]: |
---|
102 | KWordering[kw]= 'highlow' |
---|
103 | else: |
---|
104 | KWordering[kw]= 'lowhigh' |
---|
105 | |
---|
106 | for kw in KWordering: |
---|
107 | if eval(subsetDomain[kw].split(',')[0]) > eval(subsetDomain[kw].split(',')[-1]): |
---|
108 | subOrder='highlow' |
---|
109 | else: |
---|
110 | subOrder='lowhigh' |
---|
111 | #if order needs reversing, reverse the values in the subset domain string |
---|
112 | if subOrder != KWordering[kw]: |
---|
113 | valList=[] |
---|
114 | valList= subsetDomain[kw].split(',') |
---|
115 | valStr='' |
---|
116 | valList.reverse() |
---|
117 | for val in valList: |
---|
118 | valStr=valStr + ',' + val |
---|
119 | subsetDomain[kw] = valStr[1:] |
---|
120 | return subsetDomain, totalArraySize |
---|
121 | |
---|
122 | def _setCalendar(feature, timeName, ords): |
---|
123 | '''sets the cdtime calendar needed for conversion from CSML to NetCDF''' |
---|
124 | calset=False |
---|
125 | for gridOrd in ords: |
---|
126 | if gridOrd.coordAxisLabel.CONTENT==timeName: |
---|
127 | try: |
---|
128 | caltype=gridOrd.coordAxisValues.timePositionList.frame.split(':',2)[1] |
---|
129 | csml.csmllibs.csmltime.setcdtimeCalendar(caltype) |
---|
130 | calset=True |
---|
131 | except:pass |
---|
132 | if calset!=True: |
---|
133 | csml.csmllibs.csmltime.setcdtimeCalendar(csml.csmllibs.csmltime.cdtime.DefaultCalendar) |
---|
134 | try: |
---|
135 | caltype=gridOrd.coordAxisValues.timePositionList.frame.split(':',2)[1] |
---|
136 | csml.csmllibs.csmltime.setcdtimeCalendar(caltype) |
---|
137 | except: |
---|
138 | csml.csmllibs.csmltime.setcdtimeCalendar(csml.csmllibs.csmltime.cdtime.DefaultCalendar) |
---|
139 | caltype='standard' |
---|
140 | return caltype |
---|
141 | |
---|
142 | |
---|
143 | def _getTimes(timeSelection, timeName, domain): |
---|
144 | ''' Given a selection of times (min_t, max_t) or (t1, t2, t3,...tn) or just 't' and the name of the time axis in the CRS, and the domain, returns subset of times |
---|
145 | @param timeSelection: times - in one of 3 forms: (min_t, max_t) or (t1, t2, t3,...tn) or just 't' |
---|
146 | @param timeName: name of the time axis |
---|
147 | @param domain: the domain dictionary |
---|
148 | @return: subset of times. e.g if a range is selected, returns all the times in that range. Times are correctly formatted. |
---|
149 | ''' |
---|
150 | #currently supporting domain subsetting only by CRS name |
---|
151 | #(but should be easy to extend later) |
---|
152 | if type(timeSelection) in [str,unicode]: |
---|
153 | times=(timeSelection,) |
---|
154 | else: |
---|
155 | times=timeSelection |
---|
156 | if len(times)==0: |
---|
157 | #no time specified, select all. |
---|
158 | times= domain[timeName] |
---|
159 | tone=csml.csmllibs.csmltime.getCDtime(times[0]) |
---|
160 | |
---|
161 | if len(times) == 2: |
---|
162 | #then this is a range of times (t1, tn), and not a list |
---|
163 | try: |
---|
164 | tone=csml.csmllibs.csmltime.getCDtime(times[0]) |
---|
165 | except: |
---|
166 | tone=times[0] |
---|
167 | try: |
---|
168 | ttwo=csml.csmllibs.csmltime.getCDtime(times[1]) |
---|
169 | except: |
---|
170 | ttwo=times[1] |
---|
171 | times=[] |
---|
172 | for time in domain[timeName]: |
---|
173 | timeok=csml.csmllibs.csmltime.compareTimes(tone,time,ttwo) |
---|
174 | if timeok ==1: |
---|
175 | times.append(time) |
---|
176 | return times |
---|
177 | |
---|
178 | def _getTimeToFileRatio(feature,domain, timeName): |
---|
179 | ''' Calculates the ratio of times to files... i.e are there 5 times in each data file? 500 times? |
---|
180 | @param feature: a feature instance |
---|
181 | @param domain: the domain dictonary |
---|
182 | @param timeName: the name of the time axis in the domain dictionary |
---|
183 | @return: ratio of times to files (integer) |
---|
184 | ''' |
---|
185 | |
---|
186 | if hasattr(feature.value.rangeSet, 'valueArray'): |
---|
187 | if hasattr(feature.value.rangeSet.valueArray.valueComponent, 'insertedExtract'): |
---|
188 | try: |
---|
189 | numFiles= len( csmlutils.listify(feature.value.rangeSet.valueArray.valueComponent.insertedExtract.components)[0].fileList.fileNames.CONTENT.split()) |
---|
190 | except: |
---|
191 | try: |
---|
192 | numFiles= len(csmlutils.listify(feature.value.rangeSet.valueArray.valueComponent.insertedExtract.components)[0].fileName.CONTENT) |
---|
193 | except: |
---|
194 | numFiles= len(csmlutils.listify(feature.value.rangeSet.valueArray.valueComponent.insertedExtract.fileName.CONTENT)) |
---|
195 | timeToFileRatio= len(domain[timeName])/numFiles |
---|
196 | if timeToFileRatio==0: |
---|
197 | timeToFileRatio=None |
---|
198 | return timeToFileRatio |
---|
199 | |
---|
200 | def getCoordTransformTable(domainSubset, crs, frame): |
---|
201 | ''' |
---|
202 | Given a domainSubset and a crs and a temporal reference fram returns a csml.parser.coordTransformTable object |
---|
203 | @param domainSubset: dictionary |
---|
204 | @param crs: a crs identifier |
---|
205 | @param frame: a temporal reference system identifier |
---|
206 | ''' |
---|
207 | cTT=csml.parser.GridCoordinatesTable() |
---|
208 | ords =[] |
---|
209 | for key in domainSubset.keys(): |
---|
210 | go=csml.parser.GridOrdinateDescription() |
---|
211 | go.coordAxisLabel=csml.parser.csString(key) |
---|
212 | go.gridAxesSpanned=csml.parser.csString(key) |
---|
213 | go.coordAxisValues = csml.parser.SpatialOrTemporalPositionList() |
---|
214 | go.coordAxisValues.id=csml.csmllibs.csmlextra.getRandomID() |
---|
215 | vals=str(domainSubset[key]).replace(',',' ') |
---|
216 | |
---|
217 | if key==crs.axes[crs.timeAxis]: |
---|
218 | go.coordAxisValues.timePositionList=csml.parser.csString(vals) |
---|
219 | go.coordAxisValues.frame = frame |
---|
220 | #self.domain.key placeholder |
---|
221 | else: |
---|
222 | go.coordAxisValues.coordinateList=csml.parser.csString(vals) #self.domain.key placeholder |
---|
223 | seqRule= csml.parser.SequenceRule() |
---|
224 | seqRule.CONTENT='Linear' |
---|
225 | seqRule.axisOrder='+1' #TO DO. Work this out. |
---|
226 | go.sequenceRule=seqRule |
---|
227 | ords.append(go) #go needs a few more properties setting |
---|
228 | cTT.gridOrdinates=ords |
---|
229 | return cTT |
---|
230 | |
---|
231 | |
---|
232 | |
---|
233 | def getTheData(feature, selection, times,timeName, timeAxis): |
---|
234 | ''' |
---|
235 | Wraps a lot of the detail involved in making a request for data |
---|
236 | @param feature: a feature instance |
---|
237 | @param selection: the selection object |
---|
238 | @param times: the times to select |
---|
239 | @param timeName: the name of the time axis |
---|
240 | @param timeaxis: TODO REMOVE THIS. |
---|
241 | @return: times, axisorder, units, fulldata, fillvalue, where fulldata contains the actual data array, and the rest is ancilliary info about the data. |
---|
242 | ''' |
---|
243 | |
---|
244 | #SOME OF THIS SHOULD PROBABLY BE IN THE DATA IO LAYER |
---|
245 | #add time range back into the selection |
---|
246 | |
---|
247 | if len(times)==1: |
---|
248 | selection[timeName]=times[0] |
---|
249 | else: |
---|
250 | selection[timeName]=(times[0], times[len(times)-1]) |
---|
251 | domain = feature.domain |
---|
252 | value=feature.value |
---|
253 | files=[] |
---|
254 | strTimes='' |
---|
255 | fulldata=None |
---|
256 | #get the ratio of times to files |
---|
257 | timeToFileRatio = _getTimeToFileRatio(feature, domain, timeName) |
---|
258 | |
---|
259 | #list to keep track of files that have already been fetched. eg. if multiple times are in a single file only need to get data from that file once... |
---|
260 | filesFetched=[] |
---|
261 | |
---|
262 | #get list of file extracts |
---|
263 | try: |
---|
264 | componentlist = csmlutils.listify(value.rangeSet.valueArray.valueComponent.insertedExtract.components) |
---|
265 | except: |
---|
266 | componentlist = csmlutils.listify(value.rangeSet.valueArray.valueComponent.insertedExtract) |
---|
267 | for time in times: |
---|
268 | listPosition=domain[timeName].index(time) |
---|
269 | strTimes= strTimes + ' ' + time |
---|
270 | for comp in componentlist: |
---|
271 | if timeToFileRatio == None: |
---|
272 | filePos=None |
---|
273 | else: |
---|
274 | filePos=int(float(listPosition)/timeToFileRatio) |
---|
275 | if filePos in filesFetched: |
---|
276 | continue #already got data from this file, try next time |
---|
277 | data, fillvalue, axisorder, units=comp.getData(fileposition=filePos, **selection) |
---|
278 | if filePos is None: |
---|
279 | files.append(comp.fileName.CONTENT) |
---|
280 | else: |
---|
281 | try: |
---|
282 | files.append(comp.fileList.fileNames.CONTENT[filePos]) #TODO, get the right file name |
---|
283 | except: |
---|
284 | files.append(comp.fileName.CONTENT) |
---|
285 | if fulldata is None: |
---|
286 | fulldata=data |
---|
287 | else: |
---|
288 | newfulldata=Numeric.concatenate([fulldata,data], axis=0) |
---|
289 | fulldata=newfulldata |
---|
290 | filesFetched.append(filePos) |
---|
291 | if hasattr(value.rangeSet, 'valueArray'): |
---|
292 | units.append(value.rangeSet.valueArray.valueComponent.uom) # final unit is that of the parameter |
---|
293 | else: |
---|
294 | units.append ('unitsTBA') |
---|
295 | |
---|
296 | return strTimes, axisorder, units, fulldata, fillvalue |
---|
297 | |
---|
298 | def genericSubset(feature, outputdir, ncname, domain, kwargs): |
---|
299 | ''' |
---|
300 | High level wrapper method for subsetting features, calls most of the other functions in this module. |
---|
301 | @param feature: a feature instance |
---|
302 | @param outputdir: outputdir to write files |
---|
303 | @param ncname: name of netcdf file to create |
---|
304 | @param domain: domain of feature(dictionary) |
---|
305 | @param kwargs: subset request (dictionary) |
---|
306 | ''' |
---|
307 | |
---|
308 | if outputdir is not None: |
---|
309 | pathToSubsetNetCDF=outputdir+'/' +ncname |
---|
310 | else: |
---|
311 | pathToSubsetNetCDF=ncname |
---|
312 | |
---|
313 | #deal with longitude requests |
---|
314 | #if the request is in -ve,+ve eg (-30,30) but the data is in (0,360) need to handle this by changing the args. |
---|
315 | kwargs=csmlutils.fixLongitude(domain,kwargs) |
---|
316 | |
---|
317 | |
---|
318 | #if request doesn't match domain points find nearest neighbours |
---|
319 | kwargs=csml.API.genSubset.checkNeighbours(domain, **kwargs) |
---|
320 | |
---|
321 | #get the name of the time axis in the coordinate reference system |
---|
322 | cat=csml.csmllibs.csmlcrs.CRSCatalogue() |
---|
323 | if type(feature) == csml.parser.GridSeriesFeature: |
---|
324 | crs=cat.getCRS(feature.value.gridSeriesDomain.srsName) |
---|
325 | gridordinates = feature.value.gridSeriesDomain.coordTransformTable.gridOrdinates |
---|
326 | elif type(feature) == csml.parser.ProfileSeriesFeature: |
---|
327 | crs=cat.getCRS(feature.value.profileSeriesDomain.srsName) |
---|
328 | gridordinates = feature.value.profileSeriesDomain.coordTransformTable.gridOrdinates |
---|
329 | elif type(feature) == csml.parser.TrajectoryFeature: |
---|
330 | crs=cat.getCRS(feature.value.trajectoryDomain.srsName) |
---|
331 | gridordinates = feature.value.trajectoryDomain.coordTransformTable.gridOrdinates |
---|
332 | timeName=crs.axes[crs.timeAxis] |
---|
333 | #Find the original time name (timeAxis) for the file. |
---|
334 | for gridOrd in gridordinates: |
---|
335 | if gridOrd.coordAxisLabel.CONTENT==timeName: |
---|
336 | timeAxis = gridOrd.gridAxesSpanned.CONTENT #only works for regular grids atm |
---|
337 | break |
---|
338 | #set the reference system for the time axis |
---|
339 | caltype=_setCalendar(feature, timeName, gridordinates) |
---|
340 | try: |
---|
341 | timeSelection=kwargs[timeName] |
---|
342 | except KeyError: |
---|
343 | timeSelection=[] |
---|
344 | times=_getTimes(timeSelection, timeName,domain) |
---|
345 | return pathToSubsetNetCDF, kwargs, timeAxis,timeName, caltype, times |
---|