1 | #!/usr/bin/env python |
---|
2 | #************************************************************************************** |
---|
3 | #csmlfeaturetypes.py |
---|
4 | #For creating CSML featuretypes |
---|
5 | #v0.5 split off 11th November 2005 |
---|
6 | #Dominic Lowe, BADC |
---|
7 | #************************************************************************************** |
---|
8 | |
---|
9 | import csml.parser as cp |
---|
10 | import csml.csmllibs |
---|
11 | import sys |
---|
12 | |
---|
13 | class featureBuilder(object): |
---|
14 | def __init__(self, dataset_element, gml_FeatureCollection_element, ffmap,fileExtractDictionary, timedim, timestorage,spatialstorage,valuestorage): |
---|
15 | self.ds_element=dataset_element |
---|
16 | self.gml_FeatureCollection_element = gml_FeatureCollection_element |
---|
17 | self.ffmap = ffmap |
---|
18 | self.fileExtractDictionary = fileExtractDictionary |
---|
19 | self.timedim = timedim |
---|
20 | self.timestorage=timestorage |
---|
21 | self.spatialstorage=spatialstorage |
---|
22 | self.valuestorage=valuestorage |
---|
23 | |
---|
24 | #empty list to hold featureMembers |
---|
25 | self.fms =[] |
---|
26 | |
---|
27 | #at the moment, only one featuretype per CSML Dataset is supported. |
---|
28 | #get the featuretype of the first representative file in the ffmap object |
---|
29 | self.featuretype= self.ffmap.getRepresentativeFiles()[0].getFeatureType() |
---|
30 | #and create the features |
---|
31 | print 'determining feature type' |
---|
32 | print self.featuretype |
---|
33 | if self.featuretype == 'GridSeries': |
---|
34 | self.createCSMLGridSeriesFeatures() |
---|
35 | elif self.featuretype == 'PointSeries': |
---|
36 | self.createCSMLPointSeriesFeatures() |
---|
37 | |
---|
38 | #after the features have been generated append all featureMembers to the feature collection |
---|
39 | self.gml_FeatureCollection_element.members=self.fms |
---|
40 | |
---|
41 | |
---|
42 | #Some internal methods that are of use to all feature types: |
---|
43 | def _getDescriptiveName(self,DI): |
---|
44 | #given a data interface class with the variable or axis set, try to get a descriptive name |
---|
45 | #eg. long name |
---|
46 | try: |
---|
47 | descName=DI.getVariableAttribute('long_name') |
---|
48 | descName |
---|
49 | except AttributeError: |
---|
50 | descName = "missing name" |
---|
51 | descName=descName.replace('&','&') #remove ampersands TODO- extend this |
---|
52 | return descName |
---|
53 | |
---|
54 | def _populateListOfFiles(self,repfile): |
---|
55 | #given a representative file, get list of files: one representative file + all related files |
---|
56 | listOfFiles=[] |
---|
57 | repfilename=repfile.getRepresentativeFileName() |
---|
58 | listOfFiles.append(repfilename) |
---|
59 | relfiles = repfile.getRelatedFiles() |
---|
60 | for f in relfiles: |
---|
61 | fname = f.getRelatedFileName() |
---|
62 | listOfFiles.append(fname) |
---|
63 | return repfilename,listOfFiles |
---|
64 | |
---|
65 | def _getFilesAndTimes(self): |
---|
66 | #TODO try and speed up csmllibs.csmltime.getFileTimeList |
---|
67 | OrderedFileTimeList,self.caltype,self.units = csml.csmllibs.csmltime.getFileTimeList(self.listOfFiles,self.timedim) |
---|
68 | #build strings to hold times/filenames for current gridseriesfeature |
---|
69 | self.timeString ='' |
---|
70 | self.filesinDir = '' |
---|
71 | for j in range (0, len(OrderedFileTimeList)): |
---|
72 | t= OrderedFileTimeList[j][0] |
---|
73 | f = OrderedFileTimeList[j][1] |
---|
74 | self.timeString = self.timeString + ' ' + str(t) |
---|
75 | self.filesinDir = self.filesinDir + ' ' + f |
---|
76 | |
---|
77 | def _getCorrectExtractType(self): |
---|
78 | #returns an empty parser file extract object of the correct type. |
---|
79 | if self.extractType=='NetCDFExtract': |
---|
80 | fe = csml.parser.NetCDFExtract() |
---|
81 | if self.extractType=='NASAAmesExtract': |
---|
82 | fe = csml.parser.NASAAmesExtract() |
---|
83 | if self.extractType=='GRIBExtract': |
---|
84 | fe = csml.parser.GRIBExtract() |
---|
85 | if self.extractType=='PPExtract': |
---|
86 | fe = csml.parser.PPExtract() |
---|
87 | return fe |
---|
88 | |
---|
89 | |
---|
90 | def createCSMLGridSeriesFeatures(self): |
---|
91 | #This method assumes that the variables (features) are shared across identically structured files |
---|
92 | #should be supplied with a featurefilemap object (see csmlfiles for FileMapMaker) |
---|
93 | representativeFiles=self.ffmap.getRepresentativeFiles() |
---|
94 | for repfile in representativeFiles: |
---|
95 | self.repfilename,self.listOfFiles=self._populateListOfFiles(repfile) |
---|
96 | self._getFilesAndTimes() |
---|
97 | #Open representative file and create feature members: |
---|
98 | DI = csml.csmllibs.csmldataiface.DataInterface() |
---|
99 | DI=DI.getUnknownInterfaceType(self.repfilename) |
---|
100 | DI.openFile(self.repfilename) |
---|
101 | allVarNames=DI.getListofVariables() |
---|
102 | numFeatures=len(allVarNames) |
---|
103 | #Create a GridSeriesFeature for each variable: |
---|
104 | for i in range(0, numFeatures): |
---|
105 | DI.setVariable(allVarNames[i]) |
---|
106 | dimNames=DI.getVariableAxes() |
---|
107 | if len(dimNames) <= 2: |
---|
108 | #it's an axis or bounds not a feature, try next variable |
---|
109 | continue |
---|
110 | gsFeature=cp.GridSeriesFeature() |
---|
111 | gsFeature.id=str(allVarNames[i]) |
---|
112 | desc = self._getDescriptiveName(DI) |
---|
113 | #GridSeriesFeature_element.description=csml.parser.Description(desc) |
---|
114 | gsFeature.description=desc |
---|
115 | #value (coverage) |
---|
116 | gsCoverage=cp.GridSeriesCoverage() |
---|
117 | gsDomain=cp.GridSeriesDomain() |
---|
118 | gcT=cp.GridCoordinatesTable() |
---|
119 | |
---|
120 | #add an axisName element(s) for each spatial dimension. |
---|
121 | #and an ordinate element |
---|
122 | axes=' ' |
---|
123 | for i in range (len(dimNames)): |
---|
124 | i=i+1 |
---|
125 | axisname ='dim'+str(i) |
---|
126 | axes =axes + axisname + ' ' |
---|
127 | gsDomain.axisLabels=cp.csString(axes) |
---|
128 | ordinates=[] |
---|
129 | for dimName in enumerate(dimNames): |
---|
130 | ord=cp.GridOrdinateDescription() |
---|
131 | ord.gridAxesSpanned=cp.csString('dim' + str(dimName[0])) |
---|
132 | ord.coordAxisLabel=cp.csString(dimName[1]) |
---|
133 | ord.sequenceRule=cp.csString(csml.csmllibs.csmlextra.getSeqRule(len(dimNames))) |
---|
134 | sptList=cp.SpatialOrTemporalPositionList() |
---|
135 | |
---|
136 | if dimName[1]==self.timedim: |
---|
137 | #this is the time dimension. handle calendaring etc when getting the data. |
---|
138 | if self.timestorage=='fileextract': |
---|
139 | #look up file extract name in dictionary |
---|
140 | #(axisid stored in dictionary = current filename + variable name) |
---|
141 | axisid=self.repfilename+dimName[1] |
---|
142 | sptList.coordinateList=cp.csString('#'+self.fileExtractDictionary[axisid]) |
---|
143 | else: |
---|
144 | #store times inline |
---|
145 | DI.setAxis(dimName[1]) |
---|
146 | sptList.coordinateList=cp.csString(self.timeString) |
---|
147 | sptList.frame='%s:%s'%(self.caltype,self.units) |
---|
148 | |
---|
149 | else: #for all other dimensions, create ordinates |
---|
150 | if self.spatialstorage=='fileextract': |
---|
151 | #look up file extract name in dictionary |
---|
152 | #(axisid stored in dictionary = current filename + variable name) |
---|
153 | axisid=self.repfilename+dimName[1] |
---|
154 | sptList.coordinateList=cp.csString('#'+self.fileExtractDictionary[axisid]) |
---|
155 | else: |
---|
156 | #store inline |
---|
157 | DI.setAxis(dimName[1]) |
---|
158 | sptList.coordinateList=cp.csString(csml.csmllibs.csmlextra.cleanString(str(DI.getDataForAxis()))) |
---|
159 | ord.coordAxisValues=sptList |
---|
160 | gcT.addChildElem('gridOrdinates',ord) |
---|
161 | gsDomain.coordTransformTable=gcT |
---|
162 | gsCoverage.gridSeriesDomain=gsDomain |
---|
163 | gsFeature.value=gsCoverage |
---|
164 | gsDomain=csml.parser.GridSeriesDomain() |
---|
165 | |
---|
166 | #DOMAIN REFERENCE |
---|
167 | #tpl=csml.parser.TimePositionList() |
---|
168 | #if self.timestorage =='inline': |
---|
169 | #tpl.timePositions=self.timeString |
---|
170 | #tpl.frame='%s:%s'%(self.caltype,self.units) |
---|
171 | #else: |
---|
172 | ## do something to create a single extract for the times (from the representative file). |
---|
173 | #tpl.timePositions = csml.csmllibs.csmlfileextracts.createSingleExtract(self.extractType, repfilename, self.timedim, len(self.timeString.split())) |
---|
174 | #tpl.frame='%s:%s'%(self.caltype,self.units) |
---|
175 | #gsDomain.domainReference=tpl |
---|
176 | #grid=csml.parser.Grid() |
---|
177 | #COVERAGE FUNCTION |
---|
178 | #mr =csml.parser.MappingRule(csml.csmllibs.csmlextra.getMappingRule(len(dimNames))) |
---|
179 | #GridSeriesFeature_element.coverageFunction=mr |
---|
180 | ##RANGESET |
---|
181 | #arrSz = DI.getArraySizeOfVar() |
---|
182 | #try: |
---|
183 | #strUom = DI.getVariableAttribute('units') |
---|
184 | #except AttributeError: |
---|
185 | ## if units attribute doesn't exist: |
---|
186 | #strUom ="dimensionless or units not determined" |
---|
187 | #rs=csml.parser.RangeSet() |
---|
188 | #if self.valuestorage=='inline': |
---|
189 | ##TO DO, store the rangeset inline - use Datablock class??? |
---|
190 | #pass |
---|
191 | #else: |
---|
192 | ##store the rangeSet as an aggregatedArray |
---|
193 | #aa=csml.parser.AggregatedArray() |
---|
194 | #aa.arraySize=[] |
---|
195 | #aa.arraySize.append(arrSz) |
---|
196 | #aa.uom=strUom |
---|
197 | #aa.aggType='new' #can it be anything else? |
---|
198 | #aa.aggIndex='1' |
---|
199 | ##FileExtract (fe) element will be NetCDF/GRIB/PPExtract element (As defined earlier in ExtractType) |
---|
200 | #self.extractType= DI.extractType |
---|
201 | #fe = self._getCorrectExtractType() |
---|
202 | #varSize=DI.getShapeOfVar() |
---|
203 | #fe.arraySize=varSize |
---|
204 | #fe.fileName=self.filesinDir |
---|
205 | #fe.variableName=allVarNames[i] |
---|
206 | #aa.component=[fe] |
---|
207 | #rs.aggregatedArray=aa |
---|
208 | #GridSeriesFeature_element.rangeSet=rs |
---|
209 | ##DOMAIN COMPLEMENT |
---|
210 | #grid.srsName='urn:EPSG:GeographicCRS:4326' |
---|
211 | #numSpDims=len(varSize) -1 |
---|
212 | #grid.srsDimension=str(numSpDims) |
---|
213 | #grid.dimension=str(numSpDims) |
---|
214 | #ge =csml.parser.GridEnvelope(low=DI.getLowLimits(), high=DI.getHighLimits()) |
---|
215 | #grid.limits=ge |
---|
216 | ##add an axisName element(s) for each spatial dimension. |
---|
217 | ## and an ordinate element |
---|
218 | #axes=[] |
---|
219 | #for i in range (1, len(dimNames)): |
---|
220 | ##axisNames |
---|
221 | #axisname ='dim'+str(i) |
---|
222 | #axes.append(axisname) |
---|
223 | ##ordinates |
---|
224 | #grid.ordinates=[] |
---|
225 | #for i in range (1, len(dimNames)): |
---|
226 | #ord=csml.parser.GridOrdinateDescription() |
---|
227 | #ord.gridAxesSpanned='dim' + str(i) |
---|
228 | #ord.sequenceRule=csml.csmllibs.csmlextra.getSeqRule(len(dimNames)) |
---|
229 | #dimName=dimNames[len(dimNames)-i] |
---|
230 | #ord.definesAxis=dimName |
---|
231 | ##look up file extract name in dictionary |
---|
232 | ##(axisid stored in dictionary = current filename + variable name) |
---|
233 | #axisid=self.repfilename+dimName |
---|
234 | #if self.spatialstorage=='fileextract': |
---|
235 | ##refer to extract |
---|
236 | #ord.axisValues='#'+self.fileExtractDictionary[axisid] |
---|
237 | #else: |
---|
238 | ##store inline |
---|
239 | #DI.setAxis(dimName) |
---|
240 | #ord.axisValues=csml.csmllibs.csmlextra.cleanString(str(DI.getDataForAxis())) |
---|
241 | #grid.ordinates.append(ord) |
---|
242 | #grid.axisNames=axes |
---|
243 | #gsDomain.domainComplement=grid |
---|
244 | #GridSeriesFeature_element.domain=gsDomain |
---|
245 | #GridSeriesFeature_element.parameter=csml.parser.Phenomenon(href='http://badc.rl.ac.uk/localparams#%s'%allVarNames[i]) |
---|
246 | self.fms.append(gsFeature) |
---|
247 | DI.closeFile() |
---|
248 | ###End of createCSMLGridSeriesFeatures### |
---|
249 | |
---|
250 | def createCSMLPointSeriesFeatures(self): |
---|
251 | representativeFiles=self.ffmap.getRepresentativeFiles() |
---|
252 | for repfile in representativeFiles: |
---|
253 | self.repfilename,self.listOfFiles=self._populateListOfFiles(repfile) |
---|
254 | self._getFilesAndTimes() |
---|
255 | DI = csml.csmllibs.csmldataiface.DataInterface() |
---|
256 | DI=DI.getUnknownInterfaceType(self.repfilename) |
---|
257 | DI.openFile(self.repfilename) |
---|
258 | allVarNames=DI.getListofVariables() |
---|
259 | numFeatures=len(allVarNames) |
---|
260 | try: |
---|
261 | DI.setAxis(self.timedim) |
---|
262 | times=DI.getDataForAxis() |
---|
263 | except: |
---|
264 | times = DI.getTimes() |
---|
265 | #Create features: |
---|
266 | for i in range (0, numFeatures): |
---|
267 | PointSeriesFeature_element=csml.parser.PointSeriesFeature() |
---|
268 | if str(allVarNames[i]).upper() in ['ERROR FLAG', 'ERROR']: #might need to extend this list |
---|
269 | break |
---|
270 | PointSeriesFeature_element.id=str(allVarNames[i]) |
---|
271 | desc=self._getDescriptiveName(DI) |
---|
272 | PointSeriesFeature_element.description=csml.parser.Description(desc) |
---|
273 | #DOMAIN |
---|
274 | psDomain=csml.parser.PointSeriesDomain() |
---|
275 | t=csml.parser.Trajectory() |
---|
276 | t.srsName='urn:EPSG:geographicCRS:4326' #TO Do |
---|
277 | t.locations =csml.parser.DirectPositionList(vals='1 1') |
---|
278 | |
---|
279 | if self.timestorage =='inline': |
---|
280 | tpl =csml.parser.TimePositionList() |
---|
281 | tpl.timePositions=self.timeString |
---|
282 | tpl.frame='%s:%s'%(self.caltype,self.units) |
---|
283 | t.times=tpl |
---|
284 | else: |
---|
285 | # do something to create a single extract for the times (from the representative file). |
---|
286 | tpl.timePositions = csml.csmllibs.csmlfileextracts.createSingleExtract(self.extractType, repfilename, self.timedim, len(self.timeString.split())) |
---|
287 | tpl.frame='%s:%s'%(self.caltype,self.units) |
---|
288 | |
---|
289 | |
---|
290 | # if self.timestorage =='inline': |
---|
291 | # t.times=csmllibs.Parser.TimePositionList('#RefSysX',str(times)) |
---|
292 | # else: |
---|
293 | # #todo: depends on the file mapping??? |
---|
294 | # t.times=csmllibs.Parser.TimePositionList('#RefSysX','blah') #blah = dummy times |
---|
295 | # print 'times: ' + str(allVarNames[i]) |
---|
296 | # print len(times) |
---|
297 | # print len(listOfFiles) |
---|
298 | # arraySize=len(times) * len(listOfFiles) |
---|
299 | # fextract=csmllibs.csmlfileextracts.createSingleExtract(self.extractType,filenames,self.timedim,arraySize) |
---|
300 | # tplist = csmllibs.Parser.TimePositionList(timePositions=fextract) |
---|
301 | # t.times=tplist |
---|
302 | filenames=csml.csmllibs.csmlextra.cleanString(str(self.listOfFiles)) |
---|
303 | psDomain.domainReference=t |
---|
304 | #RANGESET |
---|
305 | DI.setVariable(allVarNames[i]) |
---|
306 | try: |
---|
307 | strUom = DI.getVariableAttribute('units') |
---|
308 | except AttributeError: |
---|
309 | #if units attribute doesn't exist: |
---|
310 | strUom ="dimensionless or units not determined" |
---|
311 | try: |
---|
312 | measuredvalues = DI.getDataForVar() |
---|
313 | except: |
---|
314 | measuredvalues = ' could not get values ' |
---|
315 | rs=csml.parser.RangeSet() |
---|
316 | if self.valuestorage=='inline': |
---|
317 | #encode inline |
---|
318 | rs.quantityList=csml.parser.MeasureOrNullList(uom=strUom, val=str(measuredvalues)[1:-1]) |
---|
319 | else: |
---|
320 | #create a file extract link |
---|
321 | arraySize=len(measuredvalues)*len(self.listOfFiles) |
---|
322 | #TODO this needs to be able to handle inline, use VALUESTORAGE to determine which to use: |
---|
323 | self.extractType=DI.extractType |
---|
324 | fextract=csml.csmllibs.csmlfileextracts.createSingleExtract(self.extractType,filenames,allVarNames[i],arraySize) |
---|
325 | qlist = csml.parser.MeasureOrNullList(val=fextract) |
---|
326 | rs.quantityList=qlist |
---|
327 | PointSeriesFeature_element.rangeSet=rs |
---|
328 | #COVERAGEFUNCTION |
---|
329 | #PARAMETER |
---|
330 | #need to do parameter and coverageFunction elements |
---|
331 | PointSeriesFeature_element.domain=psDomain |
---|
332 | self.fms.append(PointSeriesFeature_element) |
---|
333 | DI.closeFile() |
---|
334 | |
---|
335 | |
---|
336 | #This function needs revising in light of a) csml parser and b) new profile feature types |
---|
337 | def createCSMLProfileFeature(csmldoc, dataset_element, gml_FeatureCollection_element, ffmap, timedim): |
---|
338 | representativeFiles=ffmap.getRepresentativeFiles() |
---|
339 | listOfFiles=[] |
---|
340 | for repfile in representativeFiles: |
---|
341 | repfilename=repfile.getRepresentativeFileName() |
---|
342 | listOfFiles.append(repfilename) |
---|
343 | relfiles = repfile.getRelatedFiles() |
---|
344 | for f in relfiles: |
---|
345 | #hopefully there are no related files at the moment! |
---|
346 | fname = f.getRelatedFileName() |
---|
347 | listOfFiles.append(fname) |
---|
348 | #print listOfFiles |
---|
349 | |
---|
350 | for file in listOfFiles: |
---|
351 | DI = csmllibs.csmldataiface.DataInterface() |
---|
352 | DI=DI.getUnknownInterfaceType(file) |
---|
353 | print'opening file' |
---|
354 | DI.openFile(file) |
---|
355 | print 'getting variables' |
---|
356 | allVarNames=DI.getListofVariables() |
---|
357 | print 'getting feature count' |
---|
358 | numFeatures=len(allVarNames) |
---|
359 | |
---|
360 | print "FEATURES" |
---|
361 | print "***********" |
---|
362 | for i in range (0, len(allVarNames)): |
---|
363 | print allVarNames[i] |
---|
364 | |
---|
365 | for i in range (0, numFeatures): |
---|
366 | gml_featureMember_element=csmldoc.createElement("gml:featureMember") |
---|
367 | ProfileFeature_element=csmldoc.createElement("ProfileFeature") |
---|
368 | ProfileFeature_element.setAttribute('gml:id',str(allVarNames[i])) |
---|
369 | gml_description_element = csmldoc.createElement("gml:description") |
---|
370 | gml_featureMember_element.appendChild(ProfileFeature_element) |
---|
371 | #*********************************************************************** |
---|
372 | #PointSeriesDomain: |
---|
373 | #*********************************************************************** |
---|
374 | ProfileDomain_element=csmldoc.createElement("ProfileDomain") |
---|
375 | |
---|
376 | |
---|
377 | #*********************************************************************** |
---|
378 | # domainReference element (and sub-elements) |
---|
379 | #*********************************************************************** |
---|
380 | domainReference_element=csmldoc.createElement("domainReference") |
---|
381 | #orientedPosition_element=csmldoc.createElement("OrientedPosition") |
---|
382 | #locations_element=csmldoc.createElement("locations") |
---|
383 | #times_element=csmldoc.createElement("times") |
---|
384 | #trajectory_element.appendChild(locations_element) |
---|
385 | #trajectory_element.appendChild(times_element) |
---|
386 | #domainReference_element.appendChild(orientedPosition_element) |
---|
387 | |
---|
388 | #gml_timePositionList_element = csmldoc.createElement("gml:TimePositionList") |
---|
389 | #gml_timePositionList_element.appendChild(csmldoc.createTextNode(self.timeString)) |
---|
390 | #domainReference_element.appendChild(gml_timePositionList_element) |
---|
391 | ProfileDomain_element.appendChild(domainReference_element) |
---|
392 | #*********************************************************************** |
---|
393 | domainComplement_element=csmldoc.createElement("domainComplement") |
---|
394 | ProfileDomain_element.appendChild(domainComplement_element) |
---|
395 | |
---|
396 | #*********************************************************************** |
---|
397 | # gml:rangeSet_element |
---|
398 | #*********************************************************************** |
---|
399 | |
---|
400 | gml_rangeSet_element=csmldoc.createElement("gml:rangeSet") |
---|
401 | |
---|
402 | #*********************************************************************** |
---|
403 | # gml:coverageFunction element (and sub-element MappingRule) |
---|
404 | #*********************************************************************** |
---|
405 | gml_coverageFunction_element=csmldoc.createElement("gml:coverageFunction") |
---|
406 | MappingRule_element=csmldoc.createElement("MappingRule") |
---|
407 | #MappingRule_element.setAttribute('scanOrder',csmllibs.csmlextra.getMappingRule(len(dimNames))) |
---|
408 | MappingRule_element.setAttribute('scanOrder','tba') |
---|
409 | gml_coverageFunction_element.appendChild(MappingRule_element) |
---|
410 | |
---|
411 | |
---|
412 | gml_featureMember_element.appendChild(ProfileDomain_element) |
---|
413 | gml_featureMember_element.appendChild(gml_rangeSet_element) |
---|
414 | gml_featureMember_element.appendChild(gml_coverageFunction_element) |
---|
415 | gml_FeatureCollection_element.appendChild(gml_featureMember_element) |
---|
416 | |
---|
417 | return |
---|
418 | |
---|
419 | |
---|