source: TI02-CSML/trunk/parser/csmlio.py @ 929

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/parser/csmlio.py@929
Revision 929, 14.7 KB checked in by domlowe, 13 years ago (diff)

basic writing of subsetted netcdf file

Line 
1#csmlio.py
2#Contains Parser wrapper'functions to enable reading of data
3
4from  Parser import *
5import nappy
6import cdms
7import parser_extra
8from xml.dom.minidom import parseString
9import elementtree.ElementTree as etree
10
11#import the DataInterface module from the Scanner, assumes it is in a sibling directory to the one containing csmlio.py
12#TODO - how best to organise modules...
13import os
14currentPath=os.getcwd()
15parentPath=os.sep.join(currentPath.split(os.sep)[:-1])
16parserPath=parentPath + '/Scanner/csmllibs'
17sys.path.append(parserPath) #append the parser path to sys.path
18try:
19    import csmldataiface
20except:
21    print 'Could not import CSML Data Interface module. Make sure the Parser code is in ../newparser directory on the same level as ../Scanner directory.'
22    sys.exit()
23
24
25class CSMLinterface:
26    #Wrapper class containing methods to read/write from/to CSML
27    #Uses parser.
28    def __init__(self):
29        self.dataset=None
30        self.currentFeature=None
31        self.currentArrayDescriptor = None
32        self.domainReference=None
33        self.domainComplement=None
34        #this is a fix to the  ElementTree namespace problem that namespaces are usually represented as ns0, ns1, ns2 etc.
35        etree._namespace_map.update({
36        'http://www.opengis.net/om': 'om',  'http://www.opengis.net/gml': 'gml','http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'})
37   
38    def __setFeature(self,featureID):
39        #given a featureID, set the currentFeature if it is not already set
40        #this is used to check the correct feature is always set.   
41            for member in self.dataset.featureCollection.members:
42                if member.id == featureID:
43                    self.currentFeature = member
44
45    def __setArrayDescriptor(self,fileExtractID):
46        #given a fileExtractID, set the currentArrayDescriptor if it is not already set
47        if self.currentArrayDescriptor is None:
48            for arrayDescriptor in self.dataset.arrayDescriptors:
49                if arrayDescriptor.id == fileExtractID:
50                    self.currentArrayDescriptor=arrayDescriptor
51        if self.currentArrayDescriptor.id != fileExtractID:
52            for arrayDescriptor in self.dataset.arrayDescriptors:
53                if arrayDescriptor.id == fileExtractID:
54                    self.currentArrayDescriptor=arrayDescriptor
55
56    def parse(self,csmldoc):
57        #takes incoming csml document and parses it.
58        tree = ElementTree(file=csmldoc)
59        self.dataset=Dataset()
60        self.dataset.fromXML(tree.getroot())
61        self.dataset =parser_extra.ParserPostProcessor(self.dataset).resolveReferences()
62        #self.currentFeature holds a feature object
63        self.currentFeature = None
64       
65    def getDatasetObj(self):
66        #if you want to bypass this wrapper layer, call this method and it will
67        #return a Parser.Dataset object, which you can interrogate...
68        return self.dataset
69
70    def getCSMLasString(self):
71        #returns csml document as a string
72        strCSML=self.dataset.toXML()
73        #strCSML= parseString(tostring(strCSML)).toprettyxml()
74        #strCSML= parser_extra.removeInlineNS(strCSML)
75        return strCSML
76
77    def getFeatureList(self):
78        #returns a list of feature ids
79        self.featureList = []
80        for member in self.dataset.featureCollection.members:
81             self.featureList.append(member.id)
82        return self.featureList
83       
84    def getFeatureType(self,featureID):
85        self.__setFeature(featureID)
86        if isinstance(self.currentFeature, PointFeature):
87            featureType = 'PointFeature'
88        elif isinstance(self.currentFeature, PointSeriesFeature):
89            featureType = 'PointSeriesFeature'
90        elif isinstance(self.currentFeature, ProfileFeature):
91            featureType = 'ProfileFeature'
92        elif isinstance(self.currentFeature, ProfileSeriesFeature):
93            featureType = 'ProfileSeriesFeature'
94        elif isinstance(self.currentFeature, GridFeature):
95            featureType = 'GridFeature'
96        elif isinstance(self.currentFeature, GridSeriesFeature):
97            featureType = 'GridSeriesFeature'
98        elif isinstance(self.currentFeature, TrajectoryFeature):
99            featureType = 'TrajectoryFeature'
100        else:
101             featureType = 'Unknown feature'
102        return featureType
103   
104    def getFeatureDescription(self,featureID):
105        #returns gml:description (== long name) of feature
106        #returns empty string if not available
107        self.__setFeature(featureID)
108        if hasattr(self.currentFeature, 'description'):
109            desc = self.currentFeature.description
110        else:
111            desc =""
112        return desc
113       
114    def getFileExtractList(self):
115        fileExtractList=[]
116        for arrayDescriptor in self.dataset.arrayDescriptors:
117            fileExtractList.append(arrayDescriptor.id)
118        return fileExtractList
119   
120    def subsetList(self, fileExtractID, valueList, minValue, maxValue):
121        #This is a simple list subsetter. Given a List of values (which may have been extracted from a NetCDF/NASAAmes file
122        #or may have been taken from inline CSML, it doesn't matter) this function returns a subset based on max and min values.
123        #There is inefficiency here as you have to read in the whole  'axis' before subsetting       
124        # Also It assumes monotonically varying axis (although a non-monotonically varying axis 'might' work depending on it's shape)
125        #find position of minValue in list:
126#         self.__setArrayDescriptor(fileExtractID)
127#         
128        #find position of maxValue in list:
129        pass
130        #slice list:
131       
132        return subset
133       
134   
135    def writeNetCDF(self,var, timeSubset, fulldata,**kwargs):
136        #writes a new netcdf file containing a subsetted dataset:
137        #var contains the variable name
138        #timeSubset contains the time values(axis)
139        #fulldata contains the data
140        #and kwargs should contain the subset (from which the axes eg lat lon are extracted)
141               
142        output_file='temp.nc'
143        from Scientific.IO.NetCDF import *  #use this for it's simple interface..
144        from Numeric import *
145        ncfile = NetCDFFile ( output_file, 'w' )
146        # create the dimensions
147       
148       
149        ncfile.createDimension ( 'time', len(timeSubset))
150        time_var = ncfile.createVariable ( 'time', Float, ('time',) )
151        time_var.longname = 'time'
152        #time_var.units = 'days since %s' % starting_julian_date.strftime()
153        floatTimes=[]
154        for time in timeSubset:
155            #floatTimes.append(float(time))
156            floatTimes.append(5) # NEED TO CONVERT 'back' from string to standard julian day.. .
157        time_var[:] =floatTimes[:]
158        for item in self.domainComplement:
159            if item in kwargs:
160                #need to get subset..
161                vals=[]
162                for value in self.domainComplement[item]:
163                    if value >= kwargs[item][0]:
164                        if value <= kwargs[item] [1]:
165                            vals.append(value)
166                #print kwargs[item]
167            else:
168                #use complete 'axis'
169                vals=self.domainComplement[item]
170            ncfile.createDimension(item, len(vals)) 
171            item_var = ncfile.createVariable ( item, Float, (item,) )
172            item_var[:]=vals[:]
173       
174        print fulldata
175        #TODO: need to order items in domainComplement to match order in subsetted data
176        #for now this will work
177        dc = self.currentFeature.domain.domainComplement
178#         print dc.ordinates[0].definesAxis
179#         print dc.ordinates[1].definesAxis
180#         print dc.ordinates[2].definesAxis
181       
182           
183        if len(dc.ordinates)==3:
184            feature_var = ncfile.createVariable (self.currentFeature.id, Float, ('time',dc.ordinates[2].definesAxis,dc.ordinates[1].definesAxis,dc.ordinates[0].definesAxis))
185       
186        feature_var[:]=fulldata[:]
187           
188        ncfile.close()
189       
190   
191    def getDataForExtract(self, fileExtractID):
192        #getDataForExtract, given the gml:id of a file extract, returns the (full) data for that extract
193        self.__setArrayDescriptor(fileExtractID)
194        file = self.currentArrayDescriptor.fileName
195        variable = self.currentArrayDescriptor.variableName
196        DI = csmldataiface.DataInterface()
197        DI=DI.getUnknownInterfaceType(file)       
198        DI.openFile(file)
199        DI.setAxis(variable)
200        fulldata = DI.getDataForAxis()
201        DI.closeFile()
202        print fulldata
203        return fulldata
204       
205    def getDataForVariable(self,file, variable, **kwargs):
206        #given a filename and a variable name, get values for that variable from data interface
207        DI = csmldataiface.DataInterface()
208        DI=DI.getUnknownInterfaceType(file)       
209        DI.openFile(file)
210        DI.setVariable(variable)
211        if kwargs:
212            data = DI.getSubsetOfDataForVar(**kwargs)
213        else:
214            data = DI.getDataForVar()
215        return data
216       
217    def getDataForFeature(self, featureID, timeSubset,  **kwargs):
218        #getDataForFeature, given the gml:id of a file extract, returns the (full) data for that feature
219        #i.e. the rangeSet
220        #Needs to use the rangeSet, domainReference and domainComplement to work out how to
221        #extract the data from file
222        self.__setFeature(featureID)
223        if timeSubset:
224            pass
225        else:
226            timeSubset=None
227        #The RangeSet can be one of:
228        #QuantityList
229        #DataBlock
230        #ArrayDescriptor
231        #AggregatedArray
232        print self.currentFeature.rangeSet
233        if hasattr(self.currentFeature.rangeSet, 'quantityList'):
234            print self.currentFeature.rangeSet.quantityList
235        elif hasattr(self.currentFeature.rangeSet, 'dataBlock'):
236            print self.currentFeature.rangeSet.dataBlock
237        elif hasattr(self.currentFeature.rangeSet, 'arrayDescriptor'):
238            print self.currentFeature.rangeSet.arrayDescriptor
239        elif hasattr(self.currentFeature.rangeSet, 'aggregatedArray'):
240            print self.currentFeature.rangeSet.aggregatedArray
241            fulldata=None
242            if timeSubset == None:
243                for comp in self.currentFeature.rangeSet.aggregatedArray.component:
244                    var = comp.variableName
245                    for file in comp.fileName.split():
246                        data = self.getDataForVariable(file, var)
247                        print file + '  ' + var
248                        if fulldata is None:
249                            fulldata = data.tolist()
250                        else:
251                            #print data
252                            for item in data.tolist():
253                                fulldata.append(item)
254                            #print fulldata
255                            print 'length' + str(len(fulldata))
256                            #note, for now treat masked arrays as lists
257            else:
258                #handle the time subset: (assumes domain reference is time...)
259                domainref = self.getDomainReference(featureID)
260                filelog=[]
261                for time in timeSubset:
262                    listPosition=domainref['t'].split().index(time)
263                    for comp in self.currentFeature.rangeSet.aggregatedArray.component:
264                        var = comp.variableName
265                        print comp.fileName.split()
266                        data = self.getDataForVariable(comp.fileName.split()[listPosition], var, **kwargs)
267                        filelog.append(comp.fileName.split()[listPosition])
268                        #print comp.fileName.split()[listPosition] + '  ' + var
269                        if fulldata is None:
270                            fulldata = data.tolist()
271                        else:
272                            #print data
273                            for item in data.tolist():
274                                fulldata.append(item)
275                            #print fulldata
276                            print 'length' + str(len(fulldata))
277                print 'FILES:' +str(filelog)               
278                self.writeNetCDF(var, timeSubset,fulldata,**kwargs)
279        return fulldata
280   
281    def getTimeSlice(self, featureID, time1, time2):
282         DI = csmldataiface.DataInterface()
283         DI=DI.getUnknownInterfaceType(file)       
284         DI.openFile(file)
285         DI.setVariable(variable)
286         return timeslicedata
287                       
288    def getDomainReference(self, featureID):
289        #This will return a list containing one or more ordinates:
290        #currently in form [Name, values]
291        self.__setFeature(featureID)
292        #domainReference could be one of:
293        #Trajectory
294        #OrientedTrajectory
295        #TimePositionList
296        if isinstance(self.currentFeature.domain.domainReference,TimePositionList):
297#             time = []
298#             time.append('t')
299#             time.append(self.currentFeature.domain.domainReference.timePositions)
300#             domainref.append(time)
301           
302            time = {}
303            time['t'] = self.currentFeature.domain.domainReference.timePositions
304            domainref  = time
305           
306        #Position
307        #OrientedPosition
308        #TimeInstant
309        self.domainReference=domainref
310        return domainref
311   
312    def getDomainComplement(self, featureID):
313        #This will return a list containing one or more ordinates:
314        #currently in form [Name, values]
315        domaincomp ={}
316        self.__setFeature(featureID)
317        dc = self.currentFeature.domain.domainComplement
318       
319        #domainComplement could be one of:
320        #Null
321        #DirectPositionList
322        #Grid
323       
324        if isinstance(dc, Grid):
325            for ordinate in dc.ordinates:
326                domaincomp[ordinate.definesAxis]=self.getDataForExtract(ordinate.axisValues.id)
327#                 newaxis = []
328#                 newaxis.append(ordinate.definesAxis)
329#                 data = self.getData(ordinate.axisValues.id) # axisValues has already been resolved to the fileExtract so the gml:id can be passed straight to getData()
330#                 newaxis.append(data)
331#                 domaincomp.append(newaxis)
332        self.domainComplement=domaincomp
333        return domaincomp
334   
335    def getDomain(self, featureID):
336        #returns both the domain reference axes and domain compliment axes in a single domain dictionary
337        #axes are in no particular order
338        domain = {}
339        dc=self.getDomainComplement(featureID)
340        dr=self.getDomainReference(featureID)
341        for key in dc.keys():
342            domain[key]=dc[key]
343        for key in dr.keys():
344            domain[key]=dr[key]
345           
346        #domain.append(self.getDomainComplement(featureID))
347        #domain.append(self.getDomainReference(featureID))
348        return domain
Note: See TracBrowser for help on using the repository browser.