1 | '''#getSubset.py contains generic subsetting code that can be reused for multiple features |
---|
2 | Dominic Lowe, CCLRC, 31/01/2007''' |
---|
3 | |
---|
4 | import csml |
---|
5 | import csmlutils |
---|
6 | import sys |
---|
7 | import Numeric |
---|
8 | |
---|
9 | def checkNeighbours(domain, gridnames, **kwargs): |
---|
10 | #for any non-range requests, get nearest neighbour |
---|
11 | #e.g if 'latitude': (11) requested, latitude=12 may be the nearest value |
---|
12 | |
---|
13 | for key in kwargs: |
---|
14 | #need to skip time axis |
---|
15 | #handle single values |
---|
16 | if type(kwargs[key]) is not tuple: |
---|
17 | if type(domain[key]) is not list: |
---|
18 | axeslist=domain[key].tolist() |
---|
19 | else: |
---|
20 | axeslist = domain[key] |
---|
21 | nearestNeighbour=csml.API.csmlutils.nudgeSingleValuesToAxisValues(kwargs[key],axeslist) |
---|
22 | if nearestNeighbour is not None: |
---|
23 | kwargs[key]=nearestNeighbour |
---|
24 | else: |
---|
25 | #handle ranges |
---|
26 | tmpkey=[] |
---|
27 | for val in kwargs[key]: |
---|
28 | nearestNeighbour=csml.API.csmlutils.nudgeSingleValuesToAxisValues(val, domain[key]) |
---|
29 | if nearestNeighbour is not None: |
---|
30 | tmpkey.append(nearestNeighbour) |
---|
31 | kwargs[key]=tuple(tmpkey) |
---|
32 | return kwargs |
---|
33 | |
---|
34 | def subsetDomain(timeaxis,times, domain,**kwargs): |
---|
35 | '''takes the domain and returns a subset according to the keyword selection criteria |
---|
36 | time = name of time dimension |
---|
37 | times = string of times |
---|
38 | ''' |
---|
39 | strTimes=times |
---|
40 | subsetDomain={} |
---|
41 | totalArraySize=1 |
---|
42 | for key in domain.keys(): |
---|
43 | straxisValues='' |
---|
44 | if key in kwargs: |
---|
45 | if key ==timeaxis: |
---|
46 | straxisValues=strTimes |
---|
47 | arraySize=len(strTimes.split()) |
---|
48 | elif type(kwargs[key]) is not tuple: |
---|
49 | #only one value not a range |
---|
50 | straxisValues=str(kwargs[key]) |
---|
51 | elif kwargs[key][0] < kwargs[key][1]: |
---|
52 | for val in domain[key]: |
---|
53 | if val is not '': |
---|
54 | if float(val) >= kwargs[key][0]: |
---|
55 | if float(val) <= kwargs[key] [1]: |
---|
56 | straxisValues=straxisValues+ str(val) + ',' |
---|
57 | straxisValues=straxisValues[:-1] |
---|
58 | else:#this if deals with selections such as longitude (330,30) where the lower limit is 'greater' than the upper limit in a mathematical sense. |
---|
59 | for val in domain[key]: |
---|
60 | if val is not '': |
---|
61 | if val >= kwargs[key][0]: |
---|
62 | straxisValues=straxisValues+ str(val) + ',' |
---|
63 | for val in domain[key]: |
---|
64 | if val is not '': |
---|
65 | if val <= kwargs[key] [1]: |
---|
66 | straxisValues=straxisValues+ str(val) + ',' |
---|
67 | straxisValues=straxisValues[:-1] |
---|
68 | else: # this dimension has not been subsetted at all - CHECK THIS |
---|
69 | for val in domain[key]: |
---|
70 | if val is not '': |
---|
71 | straxisValues=straxisValues+ str(val) + ',' |
---|
72 | straxisValues=straxisValues[:-1] |
---|
73 | |
---|
74 | subsetDomain[key]=straxisValues |
---|
75 | if key != timeaxis: |
---|
76 | arraySize=len(subsetDomain[key].split(',')) |
---|
77 | else: |
---|
78 | arraySize=len(subsetDomain[key].split()) |
---|
79 | totalArraySize = totalArraySize * arraySize |
---|
80 | |
---|
81 | return subsetDomain, totalArraySize |
---|
82 | |
---|
83 | def _setCalendar(feature, timeName, ords): |
---|
84 | '''sets the cdtime calendar needed for conversion from CSML to NetCDF''' |
---|
85 | calset=False |
---|
86 | for gridOrd in ords: |
---|
87 | if gridOrd.coordAxisLabel.CONTENT==timeName: |
---|
88 | try: |
---|
89 | caltype=gridOrd.coordAxisValues.timePositionList.frame.split(':',2)[1] |
---|
90 | csml.csmllibs.csmltime.setcdtimeCalendar(caltype) |
---|
91 | calset=True |
---|
92 | except:pass |
---|
93 | if calset!=True: |
---|
94 | csml.csmllibs.csmltime.setcdtimeCalendar(csml.csmllibs.csmltime.cdtime.DefaultCalendar) |
---|
95 | try: |
---|
96 | caltype=gridOrd.coordAxisValues.timePositionList.frame.split(':',2)[1] |
---|
97 | csml.csmllibs.csmltime.setcdtimeCalendar(caltype) |
---|
98 | except: |
---|
99 | csml.csmllibs.csmltime.setcdtimeCalendar(csml.csmllibs.csmltime.cdtime.DefaultCalendar) |
---|
100 | caltype='standard' |
---|
101 | return caltype |
---|
102 | |
---|
103 | |
---|
104 | def _getTimes(timeSelection, timeName, domain): |
---|
105 | #currently supporting domain subsetting only by CRS name |
---|
106 | #(but should be easy to extend later) |
---|
107 | if type(timeSelection) is str: |
---|
108 | times=(timeSelection,) |
---|
109 | else: |
---|
110 | times=timeSelection |
---|
111 | |
---|
112 | if len(times)==0: |
---|
113 | #no time specified, select all. |
---|
114 | times= domain[timeName] |
---|
115 | tone=csml.csmllibs.csmltime.getCDtime(times[0]) |
---|
116 | if len(times) == 2: |
---|
117 | #then this is a range of times (t1, tn), and not a list |
---|
118 | try: |
---|
119 | tone=csml.csmllibs.csmltime.getCDtime(times[0]) |
---|
120 | except: |
---|
121 | tone=times[0] |
---|
122 | try: |
---|
123 | ttwo=csml.csmllibs.csmltime.getCDtime(times[1]) |
---|
124 | except: |
---|
125 | ttwo=times[1] |
---|
126 | times=[] |
---|
127 | for time in domain[timeName]: |
---|
128 | timeok=csml.csmllibs.csmltime.compareTimes(tone,time,ttwo) |
---|
129 | if timeok ==1: |
---|
130 | times.append(time) |
---|
131 | return times |
---|
132 | |
---|
133 | def _getTimeToFileRatio(feature,domain, timeName): |
---|
134 | if hasattr(feature.value.rangeSet, 'valueArray'): |
---|
135 | if hasattr(feature.value.rangeSet.valueArray.valueComponent, 'insertedExtract'): |
---|
136 | try: |
---|
137 | numFiles= len( csmlutils.listify(feature.value.rangeSet.valueArray.valueComponent.insertedExtract.components)[0].fileList.fileNames.CONTENT.split()) |
---|
138 | except: |
---|
139 | try: |
---|
140 | numFiles= len(csmlutils.listify(feature.value.rangeSet.valueArray.valueComponent.insertedExtract.components)[0].fileName.CONTENT) |
---|
141 | except: |
---|
142 | numFiles= len(csmlutils.listify(feature.value.rangeSet.valueArray.valueComponent.insertedExtract.fileName.CONTENT)) |
---|
143 | timeToFileRatio= len(domain[timeName])/numFiles |
---|
144 | if timeToFileRatio==0: |
---|
145 | timeToFileRatio=None |
---|
146 | return timeToFileRatio |
---|
147 | |
---|
148 | def getCoordTransformTable(domainSubset, crs, frame): |
---|
149 | cTT=csml.parser.GridCoordinatesTable() |
---|
150 | ords =[] |
---|
151 | for key in domainSubset.keys(): |
---|
152 | go=csml.parser.GridOrdinateDescription() |
---|
153 | go.coordAxisLabel=csml.parser.csString(key) |
---|
154 | go.gridAxesSpanned=csml.parser.csString(key) |
---|
155 | go.coordAxisValues = csml.parser.SpatialOrTemporalPositionList() |
---|
156 | go.coordAxisValues.id=csml.csmllibs.csmlextra.getRandomID() |
---|
157 | vals=str(domainSubset[key]).replace(',',' ') |
---|
158 | |
---|
159 | if key==crs.axes[crs.timeAxis]: |
---|
160 | go.coordAxisValues.timePositionList=csml.parser.csString(vals) |
---|
161 | go.coordAxisValues.frame = frame |
---|
162 | #self.domain.key placeholder |
---|
163 | else: |
---|
164 | go.coordAxisValues.coordinateList=csml.parser.csString(vals) #self.domain.key placeholder |
---|
165 | seqRule= csml.parser.SequenceRule() |
---|
166 | seqRule.CONTENT='Linear' |
---|
167 | seqRule.axisOrder='+1' #TO DO. Work this out. |
---|
168 | go.sequenceRule=seqRule |
---|
169 | ords.append(go) #go needs a few more properties setting |
---|
170 | cTT.gridOrdinates=ords |
---|
171 | return cTT |
---|
172 | |
---|
173 | |
---|
174 | |
---|
175 | def getTheData(feature, selection, times,timeName): |
---|
176 | |
---|
177 | #SOME OF THIS SHOULD PROBABLY BE IN THE DATA IO LAYER |
---|
178 | domain = feature.domain |
---|
179 | value=feature.value |
---|
180 | files=[] |
---|
181 | strTimes='' |
---|
182 | fulldata=None |
---|
183 | #get the ratio of times to files |
---|
184 | timeToFileRatio = _getTimeToFileRatio(feature, domain, timeName) |
---|
185 | |
---|
186 | #list to keep track of files that have already been fetched. eg. if multiple times are in a single file only need to get data from that file once... |
---|
187 | filesFetched=[] |
---|
188 | |
---|
189 | #get list of file extracts |
---|
190 | try: |
---|
191 | componentlist = csmlutils.listify(value.rangeSet.valueArray.valueComponent.insertedExtract.components) |
---|
192 | except: |
---|
193 | componentlist = csmlutils.listify(value.rangeSet.valueArray.valueComponent.insertedExtract) |
---|
194 | print domain |
---|
195 | print times |
---|
196 | for time in times: |
---|
197 | listPosition=domain[timeName].index(time) |
---|
198 | strTimes= strTimes + ' ' + time |
---|
199 | for comp in componentlist: |
---|
200 | if timeToFileRatio == None: |
---|
201 | filePos=None |
---|
202 | else: |
---|
203 | filePos=int(float(listPosition)/timeToFileRatio) |
---|
204 | if filePos in filesFetched: |
---|
205 | continue #already got data from this file, try next time |
---|
206 | data, fillvalue, axisorder, units=comp.getData(fileposition=filePos, **selection) |
---|
207 | if filePos is None: |
---|
208 | files.append(comp.fileName.CONTENT) |
---|
209 | else: |
---|
210 | try: |
---|
211 | files.append(comp.fileList.fileNames.CONTENT[filePos]) #TODO, get the right file name |
---|
212 | except: |
---|
213 | files.append(comp.fileName.CONTENT) |
---|
214 | |
---|
215 | #open the file |
---|
216 | |
---|
217 | |
---|
218 | if fulldata is None: |
---|
219 | fulldata=data |
---|
220 | else: |
---|
221 | newfulldata=Numeric.concatenate([fulldata,data], axis=0) |
---|
222 | fulldata=newfulldata |
---|
223 | filesFetched.append(filePos) |
---|
224 | if hasattr(value.rangeSet, 'valueArrayl'): |
---|
225 | units.append(value.rangeSet.valueArray.valueComponent.quantityList.uom) # final unit is that of the parameter |
---|
226 | else: |
---|
227 | units.append ('unitsTBA') |
---|
228 | return strTimes, axisorder, units, fulldata, fillvalue |
---|
229 | |
---|
230 | def genericSubset(feature, outputdir, ncname, domain, kwargs): |
---|
231 | if outputdir is not None: |
---|
232 | pathToSubsetNetCDF=outputdir+'/' +ncname |
---|
233 | else: |
---|
234 | pathToSubsetNetCDF=ncname |
---|
235 | |
---|
236 | #deal with longitude requests |
---|
237 | #if the request is in -ve,+ve eg (-30,30) but the data is in (0,360) need to handle this by changing the args. |
---|
238 | kwargs=csmlutils.fixLongitude(domain,kwargs) |
---|
239 | |
---|
240 | #get the name of the time axis in the coordinate reference system |
---|
241 | cat=csml.csmllibs.csmlcrs.CRSCatalogue() |
---|
242 | if type(feature) == csml.parser.GridSeriesFeature: |
---|
243 | crs=cat.getCRS(feature.value.gridSeriesDomain.srsName,labels=feature.value.gridSeriesDomain.axisLabels) |
---|
244 | gridordinates = feature.value.gridSeriesDomain.coordTransformTable.gridOrdinates |
---|
245 | elif type(feature) == csml.parser.ProfileSeriesFeature: |
---|
246 | crs=cat.getCRS(feature.value.profileSeriesDomain.srsName,labels = feature.value.profileSeriesDomain.axisLabels) |
---|
247 | gridordinates = feature.value.profileSeriesDomain.coordTransformTable.gridOrdinates |
---|
248 | |
---|
249 | timeName=crs.axes[crs.timeAxis] |
---|
250 | #Find the original time name (timeAxis) for the file. |
---|
251 | for gridOrd in gridordinates: |
---|
252 | if gridOrd.coordAxisLabel.CONTENT==timeName: |
---|
253 | timeAxis = gridOrd.gridAxesSpanned.CONTENT #only works for regular grids atm |
---|
254 | break |
---|
255 | #set the reference system for the time axis |
---|
256 | caltype=_setCalendar(feature, timeName, gridordinates) |
---|
257 | |
---|
258 | try: |
---|
259 | timeSelection=kwargs[timeName] |
---|
260 | except KeyError: |
---|
261 | timeSelection=[] |
---|
262 | |
---|
263 | times=_getTimes(timeSelection, timeName,domain) |
---|
264 | return pathToSubsetNetCDF, kwargs, timeAxis,timeName, caltype, times |
---|