source: TI02-CSML/trunk/newparser/csmlio.py @ 883

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/newparser/csmlio.py@883
Revision 883, 12.1 KB checked in by domlowe, 14 years ago (diff)

basic temporal subsetting implemented, basic spatial part done

RevLine 
[831]1#csmlio.py
2#Contains Parser wrapper'functions to enable reading of data
3
4from  Parser import *
5import nappy
[839]6import cdms
[831]7import parser_extra
8from xml.dom.minidom import parseString
9import elementtree.ElementTree as etree
10
[839]11#import the DataInterface module from the Scanner, assumes it is in a sibling directory to the one containing csmlio.py
12#TODO - how best to organise modules...
13import os
14currentPath=os.getcwd()
15parentPath=os.sep.join(currentPath.split(os.sep)[:-1])
16parserPath=parentPath + '/Scanner/csmllibs'
17sys.path.append(parserPath) #append the parser path to sys.path
18try:
19    import csmldataiface
20except:
21    print 'Could not import CSML Data Interface module. Make sure the Parser code is in ../newparser directory on the same level as ../Scanner directory.'
22    sys.exit()
[831]23
[839]24
[831]25class CSMLinterface:
26    #Wrapper class containing methods to read/write from/to CSML
27    #Uses parser.
28    def __init__(self):
29        self.dataset=None
30        self.currentFeature=None
31        self.currentArrayDescriptor = None
32        #this is a fix to the  ElementTree namespace problem that namespaces are usually represented as ns0, ns1, ns2 etc.
33        etree._namespace_map.update({
34        'http://www.opengis.net/om': 'om',  'http://www.opengis.net/gml': 'gml','http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'})
35   
36    def __setFeature(self,featureID):
37        #given a featureID, set the currentFeature if it is not already set
[864]38        #this is used to check the correct feature is always set.   
[831]39            for member in self.dataset.featureCollection.members:
40                if member.id == featureID:
41                    self.currentFeature = member
42
43    def __setArrayDescriptor(self,fileExtractID):
44        #given a fileExtractID, set the currentArrayDescriptor if it is not already set
45        if self.currentArrayDescriptor is None:
46            for arrayDescriptor in self.dataset.arrayDescriptors:
47                if arrayDescriptor.id == fileExtractID:
48                    self.currentArrayDescriptor=arrayDescriptor
49        if self.currentArrayDescriptor.id != fileExtractID:
50            for arrayDescriptor in self.dataset.arrayDescriptors:
51                if arrayDescriptor.id == fileExtractID:
52                    self.currentArrayDescriptor=arrayDescriptor
53
54    def parse(self,csmldoc):
55        #takes incoming csml document and parses it.
56        tree = ElementTree(file=csmldoc)
57        self.dataset=Dataset()
58        self.dataset.fromXML(tree.getroot())
59        self.dataset =parser_extra.ParserPostProcessor(self.dataset).resolveReferences()
60        #self.currentFeature holds a feature object
61        self.currentFeature = None
62       
[835]63    def getDatasetObj(self):
64        #if you want to bypass this wrapper layer, call this method and it will
65        #return a Parser.Dataset object, which you can interrogate...
66        return self.dataset
[831]67
68    def getCSMLasString(self):
69        #returns csml document as a string
70        strCSML=self.dataset.toXML()
[875]71        #strCSML= parseString(tostring(strCSML)).toprettyxml()
72        #strCSML= parser_extra.removeInlineNS(strCSML)
[831]73        return strCSML
74
75    def getFeatureList(self):
76        #returns a list of feature ids
77        self.featureList = []
78        for member in self.dataset.featureCollection.members:
79             self.featureList.append(member.id)
80        return self.featureList
81       
82    def getFeatureType(self,featureID):
83        self.__setFeature(featureID)
84        if isinstance(self.currentFeature, PointFeature):
85            featureType = 'PointFeature'
86        elif isinstance(self.currentFeature, PointSeriesFeature):
87            featureType = 'PointSeriesFeature'
88        elif isinstance(self.currentFeature, ProfileFeature):
89            featureType = 'ProfileFeature'
90        elif isinstance(self.currentFeature, ProfileSeriesFeature):
91            featureType = 'ProfileSeriesFeature'
92        elif isinstance(self.currentFeature, GridFeature):
93            featureType = 'GridFeature'
94        elif isinstance(self.currentFeature, GridSeriesFeature):
95            featureType = 'GridSeriesFeature'
96        elif isinstance(self.currentFeature, TrajectoryFeature):
97            featureType = 'TrajectoryFeature'
98        else:
99             featureType = 'Unknown feature'
100        return featureType
101   
102    def getFeatureDescription(self,featureID):
103        #returns gml:description (== long name) of feature
104        #returns empty string if not available
105        self.__setFeature(featureID)
106        if hasattr(self.currentFeature, 'description'):
107            desc = self.currentFeature.description
108        else:
109            desc =""
110        return desc
111       
[864]112    def getFileExtractList(self):
113        fileExtractList=[]
114        for arrayDescriptor in self.dataset.arrayDescriptors:
115            fileExtractList.append(arrayDescriptor.id)
116        return fileExtractList
[859]117   
[875]118    def subsetList(self, fileExtractID, valueList, minValue, maxValue):
119        #This is a simple list subsetter. Given a List of values (which may have been extracted from a NetCDF/NASAAmes file
120        #or may have been taken from inline CSML, it doesn't matter) this function returns a subset based on max and min values.
121        #There is inefficiency here as you have to read in the whole  'axis' before subsetting       
122        # Also It assumes monotonically varying axis (although a non-monotonically varying axis 'might' work depending on it's shape)
123        #find position of minValue in list:
124#         self.__setArrayDescriptor(fileExtractID)
125#         
126        #find position of maxValue in list:
127        pass
128        #slice list:
129       
130        return subset
131       
132   
133
134   
135    def getDataForExtract(self, fileExtractID):
136        #getDataForExtract, given the gml:id of a file extract, returns the (full) data for that extract
[864]137        self.__setArrayDescriptor(fileExtractID)
138        file = self.currentArrayDescriptor.fileName
139        variable = self.currentArrayDescriptor.variableName
140        DI = csmldataiface.DataInterface()
141        DI=DI.getUnknownInterfaceType(file)       
142        DI.openFile(file)
143        DI.setAxis(variable)
144        fulldata = DI.getDataForAxis()
145        DI.closeFile()
[875]146        print fulldata
[864]147        return fulldata
148       
[883]149    def getDataForVariable(self,file, variable, **kwargs):
150        #given a filename and a variable name, get values for that variable from data interface
151        DI = csmldataiface.DataInterface()
152        DI=DI.getUnknownInterfaceType(file)       
153        DI.openFile(file)
154        DI.setVariable(variable)
155       
156        sys.exit()
157        if kwargs:
158            data = DI.getSubsetOfDataForVar(**kwargs)
159        else:
160            data = DI.getDataForVar()
161        return data
162       
163    def getDataForFeature(self, featureID, timeSubset=None,  **kwargs):
[875]164        #getDataForFeature, given the gml:id of a file extract, returns the (full) data for that feature
165        #i.e. the rangeSet
166        #Needs to use the rangeSet, domainReference and domainComplement to work out how to
[883]167
[875]168        #extract the data from file
169        self.__setFeature(featureID)
170        #The RangeSet can be one of:
171        #QuantityList
172        #DataBlock
173        #ArrayDescriptor
174        #AggregatedArray
175        print self.currentFeature.rangeSet
176        if hasattr(self.currentFeature.rangeSet, 'quantityList'):
177            print self.currentFeature.rangeSet.quantityList
178        elif hasattr(self.currentFeature.rangeSet, 'dataBlock'):
179            print self.currentFeature.rangeSet.dataBlock
180        elif hasattr(self.currentFeature.rangeSet, 'arrayDescriptor'):
181            print self.currentFeature.rangeSet.arrayDescriptor
182        elif hasattr(self.currentFeature.rangeSet, 'aggregatedArray'):
183            print self.currentFeature.rangeSet.aggregatedArray
184            fulldata=None
[883]185            if timeSubset == None:
186                for comp in self.currentFeature.rangeSet.aggregatedArray.component:
187                    var = comp.variableName
188                    for file in comp.fileName.split():
189                        data = self.getDataForVariable(file, var)
190                        print file + '  ' + var
191                        if fulldata is None:
192                            fulldata = data.tolist()
193                        else:
194                            #print data
195                            for item in data.tolist():
196                                fulldata.append(item)
197                            #print fulldata
198                            print 'length' + str(len(fulldata))
199                            #note, for now treat masked arrays as lists
200            else:
201                #handle the time subset: (assumes domain reference is time...)
202                domainref = self.getDomainReference(featureID)
203                filelog=[]
204                for time in timeSubset:
205                    listPosition=domainref['t'].split().index(time)
206                    for comp in self.currentFeature.rangeSet.aggregatedArray.component:
207                        var = comp.variableName
208                        print comp.fileName.split()
209                        data = self.getDataForVariable(comp.fileName.split()[listPosition], var, **kwargs)
210                        filelog.append(comp.fileName.split()[listPosition])
211                        #print comp.fileName.split()[listPosition] + '  ' + var
212                        if fulldata is None:
213                            fulldata = data.tolist()
214                        else:
215                            #print data
216                            for item in data.tolist():
217                                fulldata.append(item)
218                            #print fulldata
219                            print 'length' + str(len(fulldata))
220                print 'FILES:' +str(filelog)
[875]221        return fulldata
222   
[883]223    def getTimeSlice(self, featureID, time1, time2):
224         DI = csmldataiface.DataInterface()
225         DI=DI.getUnknownInterfaceType(file)       
226         DI.openFile(file)
227         DI.setVariable(variable)
228         return timeslicedata
229                       
[859]230    def getDomainReference(self, featureID):
[864]231        #This will return a list containing one or more ordinates:
[875]232        #currently in form [Name, values]
[859]233        self.__setFeature(featureID)
234        #domainReference could be one of:
235        #Trajectory
236        #OrientedTrajectory
237        #TimePositionList
238        if isinstance(self.currentFeature.domain.domainReference,TimePositionList):
[875]239#             time = []
240#             time.append('t')
241#             time.append(self.currentFeature.domain.domainReference.timePositions)
242#             domainref.append(time)
243           
244            time = {}
245            time['t'] = self.currentFeature.domain.domainReference.timePositions
246            domainref  = time
247           
[859]248        #Position
249        #OrientedPosition
250        #TimeInstant
251        return domainref
252   
253    def getDomainComplement(self, featureID):
[864]254        #This will return a list containing one or more ordinates:
[875]255        #currently in form [Name, values]
256        domaincomp ={}
[859]257        self.__setFeature(featureID)
[864]258        dc = self.currentFeature.domain.domainComplement
259       
[859]260        #domainComplement could be one of:
261        #Null
262        #DirectPositionList
263        #Grid
[864]264        if isinstance(dc, Grid):
[875]265       
[864]266            for ordinate in dc.ordinates:
[875]267                domaincomp[ordinate.definesAxis]=self.getDataForExtract(ordinate.axisValues.id)
268#                 newaxis = []
269#                 newaxis.append(ordinate.definesAxis)
270#                 data = self.getData(ordinate.axisValues.id) # axisValues has already been resolved to the fileExtract so the gml:id can be passed straight to getData()
271#                 newaxis.append(data)
272#                 domaincomp.append(newaxis)
[859]273        return domaincomp
274   
[864]275    def getDomain(self, featureID):
[875]276        #returns both the domain reference axes and domain compliment axes in a single domain dictionary
277        #axes are in no particular order
278        domain = {}
279        dc=self.getDomainComplement(featureID)
280        dr=self.getDomainReference(featureID)
281        for key in dc.keys():
282            domain[key]=dc[key]
283        for key in dr.keys():
284            domain[key]=dr[key]
285           
286        #domain.append(self.getDomainComplement(featureID))
287        #domain.append(self.getDomainReference(featureID))
[864]288        return domain
Note: See TracBrowser for help on using the repository browser.