1 | #!/usr/bin/env python |
---|
2 | #************************************************************************************** |
---|
3 | #csmlfeaturetypes.py |
---|
4 | #For creating CSML featuretypes |
---|
5 | #v0.5 split off 11th November 2005 |
---|
6 | #Dominic Lowe, BADC |
---|
7 | #************************************************************************************** |
---|
8 | |
---|
9 | import csml.parser as cp |
---|
10 | import csml.csmllibs |
---|
11 | import sys |
---|
12 | |
---|
13 | |
---|
14 | class featureBuilder(object): |
---|
15 | def __init__(self, dataset_element, gml_FeatureCollection_element, ffmap,fileExtractDictionary, timedim, timestorage,spatialstorage,valuestorage): |
---|
16 | self.ds_element=dataset_element |
---|
17 | self.gml_FeatureCollection_element = gml_FeatureCollection_element |
---|
18 | self.ffmap = ffmap |
---|
19 | self.fileExtractDictionary = fileExtractDictionary |
---|
20 | self.timedim = timedim |
---|
21 | self.timestorage=timestorage |
---|
22 | self.spatialstorage=spatialstorage |
---|
23 | self.valuestorage=valuestorage |
---|
24 | |
---|
25 | #empty list to hold featureMembers |
---|
26 | self.fms =[] |
---|
27 | |
---|
28 | #at the moment, only one featuretype per CSML Dataset is supported. |
---|
29 | #get the featuretype of the first representative file in the ffmap object |
---|
30 | self.featuretype= self.ffmap.getRepresentativeFiles()[0].getFeatureType() |
---|
31 | #and create the features |
---|
32 | print 'determining feature type' |
---|
33 | print self.featuretype |
---|
34 | if self.featuretype == 'GridSeries': |
---|
35 | self.createCSMLGridSeriesFeatures() |
---|
36 | elif self.featuretype == 'PointSeries': |
---|
37 | self.createCSMLPointSeriesFeatures() |
---|
38 | |
---|
39 | #after the features have been generated append all featureMembers to the feature collection |
---|
40 | self.gml_FeatureCollection_element.members=self.fms |
---|
41 | |
---|
42 | |
---|
43 | #Some internal methods that are of use to all feature types: |
---|
44 | def _getDescriptiveName(self,DI): |
---|
45 | #given a data interface class with the variable or axis set, try to get a descriptive name |
---|
46 | #eg. long name |
---|
47 | try: |
---|
48 | descName=DI.getVariableAttribute('long_name') |
---|
49 | descName |
---|
50 | except AttributeError: |
---|
51 | descName = "missing name" |
---|
52 | descName=descName.replace('&','&') #remove ampersands TODO- extend this |
---|
53 | return descName |
---|
54 | |
---|
55 | def _populateListOfFiles(self,repfile): |
---|
56 | #given a representative file, get list of files: one representative file + all related files |
---|
57 | listOfFiles=[] |
---|
58 | repfilename=repfile.getRepresentativeFileName() |
---|
59 | listOfFiles.append(repfilename) |
---|
60 | relfiles = repfile.getRelatedFiles() |
---|
61 | for f in relfiles: |
---|
62 | fname = f.getRelatedFileName() |
---|
63 | listOfFiles.append(fname) |
---|
64 | return repfilename,listOfFiles |
---|
65 | |
---|
66 | def _getFilesAndTimes(self): |
---|
67 | #TODO try and speed up csmllibs.csmltime.getFileTimeList |
---|
68 | OrderedFileTimeList,self.caltype,self.units = csml.csmllibs.csmltime.getFileTimeList(self.listOfFiles,self.timedim) |
---|
69 | #build strings to hold times/filenames for current gridseriesfeature |
---|
70 | self.timeString ='' |
---|
71 | self.filesinDir = '' |
---|
72 | for j in range (0, len(OrderedFileTimeList)): |
---|
73 | t= OrderedFileTimeList[j][0] |
---|
74 | f = OrderedFileTimeList[j][1] |
---|
75 | self.timeString = self.timeString + ' ' + str(t) |
---|
76 | self.filesinDir = self.filesinDir + ' ' + f |
---|
77 | |
---|
78 | def _getCorrectExtractType(self): |
---|
79 | #returns an empty parser file extract object of the correct type. |
---|
80 | if self.extractType=='NetCDFExtract': |
---|
81 | fe = csml.parser.NetCDFExtract() |
---|
82 | if self.extractType=='NASAAmesExtract': |
---|
83 | fe = csml.parser.NASAAmesExtract() |
---|
84 | if self.extractType=='GRIBExtract': |
---|
85 | fe = csml.parser.GRIBExtract() |
---|
86 | if self.extractType=='PPExtract': |
---|
87 | fe = csml.parser.PPExtract() |
---|
88 | return fe |
---|
89 | |
---|
90 | |
---|
91 | def createCSMLGridSeriesFeatures(self): |
---|
92 | #This method assumes that the variables (features) are shared across identically structured files |
---|
93 | #should be supplied with a featurefilemap object (see csmlfiles for FileMapMaker) |
---|
94 | representativeFiles=self.ffmap.getRepresentativeFiles() |
---|
95 | for repfile in representativeFiles: |
---|
96 | self.repfilename,self.listOfFiles=self._populateListOfFiles(repfile) |
---|
97 | self._getFilesAndTimes() |
---|
98 | #Open representative file and create feature members: |
---|
99 | DI = csml.csmllibs.csmldataiface.DataInterface() |
---|
100 | DI=DI.getUnknownInterfaceType(self.repfilename) |
---|
101 | DI.openFile(self.repfilename) |
---|
102 | allVarNames=DI.getListofVariables() |
---|
103 | numFeatures=len(allVarNames) |
---|
104 | #Create a GridSeriesFeature for each variable: |
---|
105 | for i in range(0, numFeatures): |
---|
106 | DI.setVariable(allVarNames[i]) |
---|
107 | dimNames=DI.getVariableAxes() |
---|
108 | if len(dimNames) <= 2: |
---|
109 | #it's an axis or bounds not a feature, try next variable |
---|
110 | continue |
---|
111 | gsFeature=cp.GridSeriesFeature() |
---|
112 | gsFeature.id=str(allVarNames[i]) |
---|
113 | desc = self._getDescriptiveName(DI) |
---|
114 | #GridSeriesFeature_element.description=csml.parser.Description(desc) |
---|
115 | gsFeature.description=desc |
---|
116 | #VALUE (coverage) |
---|
117 | gsCoverage=cp.GridSeriesCoverage() |
---|
118 | gsDomain=cp.GridSeriesDomain() |
---|
119 | gcT=cp.GridCoordinatesTable() |
---|
120 | #add an axisName element(s) for each spatial dimension. |
---|
121 | #and an ordinate element |
---|
122 | axes=' ' |
---|
123 | for j in range (len(dimNames)): |
---|
124 | j=j+1 |
---|
125 | axisname ='dim'+str(j) |
---|
126 | axes =axes + axisname + ' ' |
---|
127 | gsDomain.axisLabels=cp.csString(axes) |
---|
128 | ordinates=[] |
---|
129 | for dimName in enumerate(dimNames): |
---|
130 | ord=cp.GridOrdinateDescription() |
---|
131 | ord.gridAxesSpanned=cp.csString('dim' + str(dimName[0])) |
---|
132 | ord.coordAxisLabel=cp.csString(dimName[1]) |
---|
133 | ord.sequenceRule=cp.csString(csml.csmllibs.csmlextra.getSeqRule(len(dimNames))) |
---|
134 | sptList=cp.SpatialOrTemporalPositionList() |
---|
135 | |
---|
136 | if dimName[1]==self.timedim: |
---|
137 | #this is the time dimension. handle calendaring etc when getting the data. |
---|
138 | if self.timestorage=='fileextract': |
---|
139 | #look up file extract name in dictionary |
---|
140 | #(axisid stored in dictionary = current filename + variable name) |
---|
141 | axisid=self.repfilename+dimName[1] |
---|
142 | sptList.coordinateList=cp.csString('#'+self.fileExtractDictionary[axisid]) |
---|
143 | else: |
---|
144 | #store times inline |
---|
145 | DI.setAxis(dimName[1]) |
---|
146 | sptList.coordinateList=cp.csString(self.timeString) |
---|
147 | sptList.frame='%s:%s'%(self.caltype,self.units) |
---|
148 | else: #for all other dimensions, create ordinates |
---|
149 | if self.spatialstorage=='fileextract': |
---|
150 | #look up file extract name in dictionary |
---|
151 | #(axisid stored in dictionary = current filename + variable name) |
---|
152 | axisid=self.repfilename+dimName[1] |
---|
153 | sptList.coordinateList=cp.csString('#'+self.fileExtractDictionary[axisid]) |
---|
154 | else: |
---|
155 | #store inline |
---|
156 | DI.setAxis(dimName[1]) |
---|
157 | sptList.coordinateList=cp.csString(csml.csmllibs.csmlextra.cleanString(str(DI.getDataForAxis()))) |
---|
158 | ord.coordAxisValues=sptList |
---|
159 | gcT.addChildElem('gridOrdinates',ord) |
---|
160 | gsDomain.coordTransformTable=gcT |
---|
161 | gsCoverage.gridSeriesDomain=gsDomain |
---|
162 | |
---|
163 | #COVERAGE FUNCTION |
---|
164 | mr =csml.csmllibs.csmlextra.getMappingRule(len(dimNames)) |
---|
165 | gsCoverage.coverageFunction=cp.csString(mr) |
---|
166 | |
---|
167 | ##RANGESET |
---|
168 | rs=csml.parser.RangeSet() |
---|
169 | arrSz = DI.getArraySizeOfVar() |
---|
170 | try: |
---|
171 | strUom = DI.getVariableAttribute('units') |
---|
172 | except AttributeError: |
---|
173 | # if units attribute doesn't exist: |
---|
174 | strUom ="dimensionless or units not determined" |
---|
175 | if self.valuestorage=='inline': |
---|
176 | #TO DO, store the rangeset inline - use Datablock class??? |
---|
177 | pass |
---|
178 | else: |
---|
179 | #store the rangeSet as an aggregatedArray |
---|
180 | aa=cp.AggregatedArray() |
---|
181 | aa.arraySize=cp.csString(arrSz) |
---|
182 | aa.uom=cp.csString(strUom) |
---|
183 | aa.aggType=cp.csString('new') #can it be anything else? |
---|
184 | aa.aggIndex=cp.csString('1') |
---|
185 | #FileExtract (fe) element will be NetCDF/GRIB/PPExtract element (As defined earlier in ExtractType) |
---|
186 | self.extractType= DI.extractType |
---|
187 | fe = self._getCorrectExtractType() |
---|
188 | varSize=DI.getShapeOfVar() |
---|
189 | varSize=csml.csmllibs.csmlextra.cleanString1(str(varSize)) |
---|
190 | fe.arraySize=cp.csString(varSize) |
---|
191 | fe.fileName=cp.csString(self.filesinDir) |
---|
192 | fe.variableName=cp.csString(allVarNames[i]) |
---|
193 | aa.components=[fe] |
---|
194 | rs.aggregatedArray=aa |
---|
195 | gsCoverage.rangeSet=rs |
---|
196 | gsFeature.parameter=csml.parser.Phenomenon(href='http://badc.rl.ac.uk/localparams#%s'%allVarNames[i]) |
---|
197 | gsFeature.value=gsCoverage |
---|
198 | self.fms.append(gsFeature) |
---|
199 | DI.closeFile() |
---|
200 | ###End of createCSMLGridSeriesFeatures### |
---|
201 | |
---|
202 | def createCSMLPointSeriesFeatures(self): |
---|
203 | representativeFiles=self.ffmap.getRepresentativeFiles() |
---|
204 | for repfile in representativeFiles: |
---|
205 | self.repfilename,self.listOfFiles=self._populateListOfFiles(repfile) |
---|
206 | self._getFilesAndTimes() |
---|
207 | DI = csml.csmllibs.csmldataiface.DataInterface() |
---|
208 | DI=DI.getUnknownInterfaceType(self.repfilename) |
---|
209 | DI.openFile(self.repfilename) |
---|
210 | allVarNames=DI.getListofVariables() |
---|
211 | numFeatures=len(allVarNames) |
---|
212 | try: |
---|
213 | DI.setAxis(self.timedim) |
---|
214 | times=DI.getDataForAxis() |
---|
215 | except: |
---|
216 | times = DI.getTimes() |
---|
217 | #Create features: |
---|
218 | for i in range (0, numFeatures): |
---|
219 | PointSeriesFeature_element=csml.parser.PointSeriesFeature() |
---|
220 | if str(allVarNames[i]).upper() in ['ERROR FLAG', 'ERROR']: #might need to extend this list |
---|
221 | break |
---|
222 | PointSeriesFeature_element.id=str(allVarNames[i]) |
---|
223 | desc=self._getDescriptiveName(DI) |
---|
224 | PointSeriesFeature_element.description=csml.parser.Description(desc) |
---|
225 | #DOMAIN |
---|
226 | psDomain=csml.parser.PointSeriesDomain() |
---|
227 | t=csml.parser.Trajectory() |
---|
228 | t.srsName='urn:EPSG:geographicCRS:4326' #TO Do |
---|
229 | t.locations =csml.parser.DirectPositionList(vals='1 1') |
---|
230 | |
---|
231 | if self.timestorage =='inline': |
---|
232 | tpl =csml.parser.TimePositionList() |
---|
233 | tpl.timePositions=self.timeString |
---|
234 | tpl.frame='%s:%s'%(self.caltype,self.units) |
---|
235 | t.times=tpl |
---|
236 | else: |
---|
237 | # do something to create a single extract for the times (from the representative file). |
---|
238 | tpl.timePositions = csml.csmllibs.csmlfileextracts.createSingleExtract(self.extractType, repfilename, self.timedim, len(self.timeString.split())) |
---|
239 | tpl.frame='%s:%s'%(self.caltype,self.units) |
---|
240 | |
---|
241 | |
---|
242 | # if self.timestorage =='inline': |
---|
243 | # t.times=csmllibs.Parser.TimePositionList('#RefSysX',str(times)) |
---|
244 | # else: |
---|
245 | # #todo: depends on the file mapping??? |
---|
246 | # t.times=csmllibs.Parser.TimePositionList('#RefSysX','blah') #blah = dummy times |
---|
247 | # print 'times: ' + str(allVarNames[i]) |
---|
248 | # print len(times) |
---|
249 | # print len(listOfFiles) |
---|
250 | # arraySize=len(times) * len(listOfFiles) |
---|
251 | # fextract=csmllibs.csmlfileextracts.createSingleExtract(self.extractType,filenames,self.timedim,arraySize) |
---|
252 | # tplist = csmllibs.Parser.TimePositionList(timePositions=fextract) |
---|
253 | # t.times=tplist |
---|
254 | filenames=csml.csmllibs.csmlextra.cleanString(str(self.listOfFiles)) |
---|
255 | psDomain.domainReference=t |
---|
256 | #RANGESET |
---|
257 | DI.setVariable(allVarNames[i]) |
---|
258 | try: |
---|
259 | strUom = DI.getVariableAttribute('units') |
---|
260 | except AttributeError: |
---|
261 | #if units attribute doesn't exist: |
---|
262 | strUom ="dimensionless or units not determined" |
---|
263 | try: |
---|
264 | measuredvalues = DI.getDataForVar() |
---|
265 | except: |
---|
266 | measuredvalues = ' could not get values ' |
---|
267 | rs=csml.parser.RangeSet() |
---|
268 | if self.valuestorage=='inline': |
---|
269 | #encode inline |
---|
270 | rs.quantityList=csml.parser.MeasureOrNullList(uom=strUom, val=str(measuredvalues)[1:-1]) |
---|
271 | else: |
---|
272 | #create a file extract link |
---|
273 | arraySize=len(measuredvalues)*len(self.listOfFiles) |
---|
274 | #TODO this needs to be able to handle inline, use VALUESTORAGE to determine which to use: |
---|
275 | self.extractType=DI.extractType |
---|
276 | fextract=csml.csmllibs.csmlfileextracts.createSingleExtract(self.extractType,filenames,allVarNames[i],arraySize) |
---|
277 | qlist = csml.parser.MeasureOrNullList(val=fextract) |
---|
278 | rs.quantityList=qlist |
---|
279 | PointSeriesFeature_element.rangeSet=rs |
---|
280 | #COVERAGEFUNCTION |
---|
281 | #PARAMETER |
---|
282 | #need to do parameter and coverageFunction elements |
---|
283 | PointSeriesFeature_element.domain=psDomain |
---|
284 | self.fms.append(PointSeriesFeature_element) |
---|
285 | DI.closeFile() |
---|
286 | |
---|
287 | |
---|
288 | #This function needs revising in light of a) csml parser and b) new profile feature types |
---|
289 | def createCSMLProfileFeature(csmldoc, dataset_element, gml_FeatureCollection_element, ffmap, timedim): |
---|
290 | representativeFiles=ffmap.getRepresentativeFiles() |
---|
291 | listOfFiles=[] |
---|
292 | for repfile in representativeFiles: |
---|
293 | repfilename=repfile.getRepresentativeFileName() |
---|
294 | listOfFiles.append(repfilename) |
---|
295 | relfiles = repfile.getRelatedFiles() |
---|
296 | for f in relfiles: |
---|
297 | #hopefully there are no related files at the moment! |
---|
298 | fname = f.getRelatedFileName() |
---|
299 | listOfFiles.append(fname) |
---|
300 | #print listOfFiles |
---|
301 | |
---|
302 | for file in listOfFiles: |
---|
303 | DI = csmllibs.csmldataiface.DataInterface() |
---|
304 | DI=DI.getUnknownInterfaceType(file) |
---|
305 | print'opening file' |
---|
306 | DI.openFile(file) |
---|
307 | print 'getting variables' |
---|
308 | allVarNames=DI.getListofVariables() |
---|
309 | print 'getting feature count' |
---|
310 | numFeatures=len(allVarNames) |
---|
311 | |
---|
312 | print "FEATURES" |
---|
313 | print "***********" |
---|
314 | for i in range (0, len(allVarNames)): |
---|
315 | print allVarNames[i] |
---|
316 | |
---|
317 | for i in range (0, numFeatures): |
---|
318 | gml_featureMember_element=csmldoc.createElement("gml:featureMember") |
---|
319 | ProfileFeature_element=csmldoc.createElement("ProfileFeature") |
---|
320 | ProfileFeature_element.setAttribute('gml:id',str(allVarNames[i])) |
---|
321 | gml_description_element = csmldoc.createElement("gml:description") |
---|
322 | gml_featureMember_element.appendChild(ProfileFeature_element) |
---|
323 | #*********************************************************************** |
---|
324 | #PointSeriesDomain: |
---|
325 | #*********************************************************************** |
---|
326 | ProfileDomain_element=csmldoc.createElement("ProfileDomain") |
---|
327 | |
---|
328 | |
---|
329 | #*********************************************************************** |
---|
330 | # domainReference element (and sub-elements) |
---|
331 | #*********************************************************************** |
---|
332 | domainReference_element=csmldoc.createElement("domainReference") |
---|
333 | #orientedPosition_element=csmldoc.createElement("OrientedPosition") |
---|
334 | #locations_element=csmldoc.createElement("locations") |
---|
335 | #times_element=csmldoc.createElement("times") |
---|
336 | #trajectory_element.appendChild(locations_element) |
---|
337 | #trajectory_element.appendChild(times_element) |
---|
338 | #domainReference_element.appendChild(orientedPosition_element) |
---|
339 | |
---|
340 | #gml_timePositionList_element = csmldoc.createElement("gml:TimePositionList") |
---|
341 | #gml_timePositionList_element.appendChild(csmldoc.createTextNode(self.timeString)) |
---|
342 | #domainReference_element.appendChild(gml_timePositionList_element) |
---|
343 | ProfileDomain_element.appendChild(domainReference_element) |
---|
344 | #*********************************************************************** |
---|
345 | domainComplement_element=csmldoc.createElement("domainComplement") |
---|
346 | ProfileDomain_element.appendChild(domainComplement_element) |
---|
347 | |
---|
348 | #*********************************************************************** |
---|
349 | # gml:rangeSet_element |
---|
350 | #*********************************************************************** |
---|
351 | |
---|
352 | gml_rangeSet_element=csmldoc.createElement("gml:rangeSet") |
---|
353 | |
---|
354 | #*********************************************************************** |
---|
355 | # gml:coverageFunction element (and sub-element MappingRule) |
---|
356 | #*********************************************************************** |
---|
357 | gml_coverageFunction_element=csmldoc.createElement("gml:coverageFunction") |
---|
358 | MappingRule_element=csmldoc.createElement("MappingRule") |
---|
359 | #MappingRule_element.setAttribute('scanOrder',csmllibs.csmlextra.getMappingRule(len(dimNames))) |
---|
360 | MappingRule_element.setAttribute('scanOrder','tba') |
---|
361 | gml_coverageFunction_element.appendChild(MappingRule_element) |
---|
362 | |
---|
363 | |
---|
364 | gml_featureMember_element.appendChild(ProfileDomain_element) |
---|
365 | gml_featureMember_element.appendChild(gml_rangeSet_element) |
---|
366 | gml_featureMember_element.appendChild(gml_coverageFunction_element) |
---|
367 | gml_FeatureCollection_element.appendChild(gml_featureMember_element) |
---|
368 | |
---|
369 | return |
---|
370 | |
---|
371 | |
---|