source: TI02-CSML/trunk/parser/oldapi/csmlio.py @ 1011

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/parser/oldapi/csmlio.py@1011
Revision 1011, 14.7 KB checked in by domlowe, 14 years ago (diff)

moving files

Line 
1#csmlio.py
2#Contains Parser wrapper'functions to enable reading of data
3
4from  Parser import *
5import parser_extra
6import elementtree.ElementTree as etree
7from Scientific.IO.NetCDF import *  #use this instead of cdms for now for it's simple interface..
8from Numeric import *
9
10#import the DataInterface module from the Scanner, assumes it is in a sibling directory to the one containing csmlio.py
11#TODO - how best to organise modules...
12import os
13currentPath=os.getcwd()
14parentPath=os.sep.join(currentPath.split(os.sep)[:-1])
15parserPath=parentPath + '/Scanner/csmllibs'
16sys.path.append(parserPath) #append the parser path to sys.path
17try:
18    import csmldataiface
19except:
20    print 'Could not import CSML Data Interface module. Make sure the Parser code is in ../newparser directory on the same level as ../Scanner directory.'
21    sys.exit()
22
23
24class CSMLinterface:
25    #Wrapper class containing methods to read/write from/to CSML
26    #Uses parser.
27    def __init__(self):
28        self.dataset=None
29        self.currentFeature=None
30        self.currentArrayDescriptor = None
31        self.domainReference=None
32        self.domainComplement=None
33        #this is a fix to the  ElementTree namespace problem that namespaces are usually represented as ns0, ns1, ns2 etc.
34        etree._namespace_map.update({'http://www.opengis.net/om': 'om',  'http://www.opengis.net/gml': 'gml','http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'})
35   
36    def __setFeature(self,featureID):
37        #given a featureID, set the currentFeature if it is not already set
38        #this is used to check the correct feature is always set.   
39            for member in self.dataset.featureCollection.members:
40                if member.id == featureID:
41                    self.currentFeature = member
42
43    def __setArrayDescriptor(self,fileExtractID):
44        #given a fileExtractID, set the currentArrayDescriptor if it is not already set
45        if self.currentArrayDescriptor is None:
46            for arrayDescriptor in self.dataset.arrayDescriptors:
47                if arrayDescriptor.id == fileExtractID:
48                    self.currentArrayDescriptor=arrayDescriptor
49        if self.currentArrayDescriptor.id != fileExtractID:
50            for arrayDescriptor in self.dataset.arrayDescriptors:
51                if arrayDescriptor.id == fileExtractID:
52                    self.currentArrayDescriptor=arrayDescriptor
53
54    def parse(self,csmldoc):
55        #takes incoming csml document and parses it.
56        tree = ElementTree(file=csmldoc)
57        self.dataset=Dataset()
58        self.dataset.fromXML(tree.getroot())
59        self.dataset =parser_extra.ParserPostProcessor(self.dataset).resolveReferences()
60        #self.currentFeature holds a feature object
61        self.currentFeature = None
62       
63    def getDatasetObj(self):
64        #if you want to bypass this wrapper layer, call this method and it will
65        #return a Parser.Dataset object, which you can interrogate...
66        return self.dataset
67
68    def getCSMLasString(self):
69        #returns csml document as a string
70        strCSML=self.dataset.toXML()
71        #strCSML= parseString(tostring(strCSML)).toprettyxml()
72        #strCSML= parser_extra.removeInlineNS(strCSML)
73        return strCSML
74
75    def getFeatureList(self):
76        #returns a list of feature ids
77        self.featureList = []
78        for member in self.dataset.featureCollection.members:
79             self.featureList.append(member.id)
80        return self.featureList
81       
82    def getFeatureType(self,featureID):
83        self.__setFeature(featureID)
84        if isinstance(self.currentFeature, PointFeature):
85            featureType = 'PointFeature'
86        elif isinstance(self.currentFeature, PointSeriesFeature):
87            featureType = 'PointSeriesFeature'
88        elif isinstance(self.currentFeature, ProfileFeature):
89            featureType = 'ProfileFeature'
90        elif isinstance(self.currentFeature, ProfileSeriesFeature):
91            featureType = 'ProfileSeriesFeature'
92        elif isinstance(self.currentFeature, GridFeature):
93            featureType = 'GridFeature'
94        elif isinstance(self.currentFeature, GridSeriesFeature):
95            featureType = 'GridSeriesFeature'
96        elif isinstance(self.currentFeature, TrajectoryFeature):
97            featureType = 'TrajectoryFeature'
98        else:
99             featureType = 'Unknown feature'
100        return featureType
101   
102    def getFeatureDescription(self,featureID):
103        #returns gml:description (== long name) of feature
104        #returns empty string if not available
105        self.__setFeature(featureID)
106        if hasattr(self.currentFeature, 'description'):
107            desc = self.currentFeature.description
108        else:
109            desc =""
110        return desc
111       
112    def getFileExtractList(self):
113        fileExtractList=[]
114        for arrayDescriptor in self.dataset.arrayDescriptors:
115            fileExtractList.append(arrayDescriptor.id)
116        return fileExtractList
117   
118    def writeCSML(self,var, timeSubset, fulldata,**kwargs):
119        #this takes the existing CSML file and reduces it so that it just contains
120        #a description of the subsetted data.
121        output_file='temp.xml'
122        dset = self.dataset
123        feat=self.currentFeature # this is the feature we need to 'subset'
124        dset.featureCollection.members=[]# clear out all the existing features.
125        dset.arrayDescriptors=[]
126        if isinstance(feat, GridSeriesFeature):
127            strTimes =''
128            for time in self.times:
129                strTimes= strTimes + ' ' + time
130            feat.domain.domainReference=TimePositionList(timePositions=strTimes)
131            strFiles=''
132            for file in self.files:
133                strFiles= strFiles + ' ' + file
134            print dir(feat.rangeSet.aggregatedArray.component)
135            for comp in feat.rangeSet.aggregatedArray.component:
136                comp.fileName=strFiles
137#            sys.exit()
138           
139        dset.featureCollection.members.append(feat)
140        csml = dset.toXML()   
141        #Tidy up and print the CSML document:
142        #strCSML= parseString(tostring(csml)).toprettyxml()
143        strCSML=parser_extra.PrettyPrint(csml)
144        strCSML=parser_extra.removeInlineNS(strCSML)
145        print strCSML
146       
147       
148       
149       
150    def writeNetCDF(self,var, timeSubset, fulldata,**kwargs):
151        #writes a new netcdf file containing a subsetted dataset:
152        #var contains the variable name
153        #timeSubset contains the time values(axis)
154        #fulldata contains the data
155        #and kwargs should contain the subset (from which the axes eg lat lon are extracted)
156               
157        output_file='temp.nc'
158        ncfile = NetCDFFile ( output_file, 'w' )
159        # create the dimensions       
160        ncfile.createDimension ( 'time', len(timeSubset))
161        time_var = ncfile.createVariable ( 'time', Float, ('time',) )
162        time_var.longname = 'time'
163        #time_var.units = 'days since %s' % starting_julian_date.strftime()
164        floatTimes=[]
165        for time in timeSubset:
166            #floatTimes.append(float(time))
167            floatTimes.append(5) # NEED TO CONVERT 'back' from string to standard julian day.. .
168        time_var[:] =floatTimes[:]
169        for item in self.domainComplement:
170            if item in kwargs:
171                #need to get subset..
172                vals=[]
173                for value in self.domainComplement[item]:
174                    if value >= kwargs[item][0]:
175                        if value <= kwargs[item] [1]:
176                            vals.append(value)
177                #print kwargs[item]
178            else:
179                #use complete 'axis'
180                vals=self.domainComplement[item]
181            ncfile.createDimension(item, len(vals)) 
182            item_var = ncfile.createVariable ( item, Float, (item,) )
183            item_var[:]=vals[:]
184       
185        print fulldata
186        #TODO: need to order items in domainComplement to match order in subsetted data
187        #for now this will work
188        dc = self.currentFeature.domain.domainComplement       
189        if len(dc.ordinates)==3:
190            feature_var = ncfile.createVariable (self.currentFeature.id, Float, ('time',dc.ordinates[2].definesAxis,dc.ordinates[1].definesAxis,dc.ordinates[0].definesAxis))
191        elif len(dc.ordinates)==2:
192            feature_var = ncfile.createVariable (self.currentFeature.id, Float, ('time',dc.ordinates[1].definesAxis,dc.ordinates[0].definesAxis))
193        feature_var[:]=fulldata[:]
194        ncfile.close()
195   
196    def getDataForExtract(self, fileExtractID):
197        #getDataForExtract, given the gml:id of a file extract, returns the (full) data for that extract
198        self.__setArrayDescriptor(fileExtractID)
199        file = self.currentArrayDescriptor.fileName
200        variable = self.currentArrayDescriptor.variableName
201        DI = csmldataiface.DataInterface()
202        DI=DI.getUnknownInterfaceType(file)       
203        DI.openFile(file)
204        DI.setAxis(variable)
205        fulldata = DI.getDataForAxis()
206        DI.closeFile()
207        print fulldata
208        return fulldata
209       
210    def getDataForVariable(self,file, variable, **kwargs):
211        #given a filename and a variable name, get values for that variable from data interface
212        DI = csmldataiface.DataInterface()
213        DI=DI.getUnknownInterfaceType(file)       
214        DI.openFile(file)
215        DI.setVariable(variable)
216        if kwargs:
217            data = DI.getSubsetOfDataForVar(**kwargs)
218        else:
219            data = DI.getDataForVar()
220        return data
221       
222    def getDataForFeature(self, featureID, timeSubset,  **kwargs):
223        #getDataForFeature, given the gml:id of a file extract, returns the (full) data for that feature
224        #i.e. the rangeSet
225        #Needs to use the rangeSet, domainReference and domainComplement to work out how to
226        #extract the data from file
227        self.__setFeature(featureID)
228        if timeSubset:
229            pass
230        else:
231            timeSubset=None
232        #The RangeSet can be one of:
233        #QuantityList
234        #DataBlock
235        #ArrayDescriptor
236        #AggregatedArray
237        print self.currentFeature.rangeSet
238        if hasattr(self.currentFeature.rangeSet, 'quantityList'):
239            print self.currentFeature.rangeSet.quantityList
240        elif hasattr(self.currentFeature.rangeSet, 'dataBlock'):
241            print self.currentFeature.rangeSet.dataBlock
242        elif hasattr(self.currentFeature.rangeSet, 'arrayDescriptor'):
243            print self.currentFeature.rangeSet.arrayDescriptor
244        elif hasattr(self.currentFeature.rangeSet, 'aggregatedArray'):
245            print self.currentFeature.rangeSet.aggregatedArray
246            fulldata=None
247            if timeSubset == None:
248                for comp in self.currentFeature.rangeSet.aggregatedArray.component:
249                    var = comp.variableName
250                    for file in comp.fileName.split():
251                        data = self.getDataForVariable(file, var)
252                        print file + '  ' + var
253                        if fulldata is None:
254                            fulldata = data.tolist()
255                        else:
256                            #print data
257                            for item in data.tolist():
258                                fulldata.append(item)
259                            #print fulldata
260                            print 'length' + str(len(fulldata))
261                            #note, for now treat masked arrays as lists
262            else:
263                #handle the time subset: (assumes domain reference is time...)
264                domainref = self.getDomainReference(featureID)
265                filelog=[]
266                self.times=timeSubset
267                self.files=[]
268                for time in self.times:
269                    listPosition=domainref['t'].split().index(time)
270                    for comp in self.currentFeature.rangeSet.aggregatedArray.component:
271                        var = comp.variableName
272                        print comp.fileName.split()
273                        data = self.getDataForVariable(comp.fileName.split()[listPosition], var, **kwargs)
274                        self.files.append(comp.fileName.split()[listPosition])
275                        filelog.append(comp.fileName.split()[listPosition])
276                        #print comp.fileName.split()[listPosition] + '  ' + var
277                        if fulldata is None:
278                            fulldata = data.tolist()
279                        else:
280                            #print data
281                            for item in data.tolist():
282                                fulldata.append(item)
283                            #print fulldata
284                            print 'length' + str(len(fulldata))
285                print 'FILES:' +str(filelog)               
286                self.writeNetCDF(var, timeSubset,fulldata,**kwargs)
287                self.writeCSML(var, timeSubset,fulldata,**kwargs)
288        return fulldata
289   
290    def getTimeSlice(self, featureID, time1, time2):
291         DI = csmldataiface.DataInterface()
292         DI=DI.getUnknownInterfaceType(file)       
293         DI.openFile(file)
294         DI.setVariable(variable)
295         return timeslicedata
296                       
297    def getDomainReference(self, featureID):
298        #This will return a list containing one or more ordinates:
299        #currently in form [Name, values]
300        self.__setFeature(featureID)
301        #domainReference could be one of:
302        #Trajectory
303        #OrientedTrajectory
304        #TimePositionList
305        if isinstance(self.currentFeature.domain.domainReference,TimePositionList):         
306            time = {}
307            time['t'] = self.currentFeature.domain.domainReference.timePositions
308            domainref  = time
309        #Position
310        #OrientedPosition
311        #TimeInstant
312        self.domainReference=domainref
313        return domainref
314   
315    def getDomainComplement(self, featureID):
316        #This will return a list containing one or more ordinates:
317        #currently in form [Name, values]
318        domaincomp ={}
319        self.__setFeature(featureID)
320        dc = self.currentFeature.domain.domainComplement
321        #domainComplement could be one of:
322        #Null
323        #DirectPositionList
324        #Grid
325        if isinstance(dc, Grid):
326            for ordinate in dc.ordinates:
327                domaincomp[ordinate.definesAxis]=self.getDataForExtract(ordinate.axisValues.id)
328        self.domainComplement=domaincomp
329        return domaincomp
330   
331    def getDomain(self, featureID):
332        #returns both the domain reference axes and domain compliment axes in a single domain dictionary
333        #axes are in no particular order
334        domain = {}
335        dc=self.getDomainComplement(featureID)
336        dr=self.getDomainReference(featureID)
337        for key in dc.keys():
338            domain[key]=dc[key]
339        for key in dr.keys():
340            domain[key]=dr[key]
341        return domain
Note: See TracBrowser for help on using the repository browser.