source: TI02-CSML/trunk/parser/oldapi/csmldoc.py @ 1011

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/parser/oldapi/csmldoc.py@1178
Revision 1011, 12.6 KB checked in by domlowe, 14 years ago (diff)

moving files

Line 
1'''csmldoc.py
2Contains Parser high level  api to enable reading of csml document/features etc
3Note - All class names are prefixed with 'CSML' to avoid conflict with Parser classes
4of similar names.
5'''
6
7from Parser import * #import parser into same namespace
8import parser_extra
9from Scientific.IO.NetCDF import *  #use this instead of cdms for now for it's simple write interface..
10from Numeric import *
11
12#import the DataInterface module from the Scanner, assumes it is in a sibling directory to the one containing csmlio.py
13#TODO - how best to organise modules...
14import os
15currentPath=os.getcwd()
16parentPath=os.sep.join(currentPath.split(os.sep)[:-1])
17parserPath=parentPath + '/Scanner/csmllibs'
18sys.path.append(parserPath) #append the parser path to sys.path
19try:
20    import csmldataiface
21except:
22    print 'Could not import CSML Data Interface module. Make sure the Parser code is in ../parser directory on the same level as ../Scanner directory.'
23    sys.exit()
24
25#global to hold top level dataset object - otherwise we will constantly be reparsing it.
26
27class CSMLDataset:
28    def __init__(self,csmlfile):
29        """must be provided with csmlfile
30        parses the csmlfile into self.dataset """
31        self.dataset=Dataset()
32        tree = ElementTree(file=csmlfile)
33        self.dataset.fromXML(tree.getroot())
34        self.dataset =parser_extra.ParserPostProcessor(self.dataset).resolveReferences()
35    def getFeatureList(self):
36        #returns a list of feature ids for the dataset
37        self.featureList = []
38        for member in self.dataset.featureCollection.members:
39             self.featureList.append(member.id)
40        return self.featureList
41
42    def getFeature(self, featureID):
43        """ returns a single feature object, which can be accessed by both the high-level api and the Parser API"""
44        for member in self.dataset.featureCollection.members:
45                if member.id == featureID:
46                    if  isinstance(member,PointFeature):
47                        return CSMLPointFeature(member,self.dataset)
48                    elif isinstance(member,PointSeriesFeature):
49                        return CSMLPointSeriesFeature(member,self.dataset)
50                    elif  isinstance(member,ProfileFeature):
51                        return CSMLProfileFeature(member,self.dataset)
52                    elif  isinstance(member,GridSeriesFeature):
53                        return CSMLGridSeriesFeature(member,self.dataset)
54                    #elif etc... for all feature types
55
56    def getFileExtract(self,extractID):
57        for extract in self.dataset.arrayDescriptors:
58            if  extract.id==extractID:
59                return CSMLAbstractFileExtract(extract) 
60
61    #other Dataset level methods go here:
62    ############
63    #def getArrayDescriptors
64    #etc...
65   
66class CSMLAnyFile:
67    def __init__(self,filename):
68        self.DI=csmldataiface.DataInterface()
69        self.DI=self.DI.getUnknownInterfaceType(filename)
70        self.DI.openFile(filename)
71    def getVariableData(self,variableName,**kwargs):
72        #kwargs can contain a subset definition
73        self.DI.setVariable(variableName)
74        if kwargs:
75            data = self.DI.getSubsetOfDataForVar(**kwargs)
76        else:
77            data = self.DI.getDataForVar()
78        return data
79
80class CSMLAbstractFileExtract:
81    def __init__(self,extract):
82        self.parsedExtract=extract
83        self.file=self.parsedExtract.fileName
84        self.variableName=self.parsedExtract.variableName
85        self.DI = csmldataiface.DataInterface()
86        self.DI=self.DI.getUnknownInterfaceType(self.file)
87        self.DI.openFile(self.file)
88    def getAxisData(self):
89        self.DI.setAxis(self.variableName)
90        fulldata = self.DI.getDataForAxis()
91        return fulldata
92    def getVariableData(self,**kwargs):
93        #kwargs can contain a subset definition
94        self.DI.setVariable(self.variableName)
95        if kwargs:
96            data = self.DI.getSubsetOfDataForVar(**kwargs)
97        else:
98            data = self.DI.getDataForVar()
99        return data
100    def close(self):
101        self.DI.closeFile()
102
103class CSMLAbstractFeature: 
104    """Contains methods applicable to any feature but methods can be overwritten by individual feature types
105        - see testMethod for example.
106    """
107    def __init__(self,feature,parentDataset):
108        self.parsedFeature=feature
109        self.parentDataset=parentDataset
110        '''sets self.parsedFeature to contain the lower level parsed feature'''
111    def getAllowedSubsettings(self):
112        return ['none']
113
114    def getFeatureType(self):
115        return str(self.parsedFeature)
116   
117    def getBestName(self):
118        #okay it's still to be determined what the best name for feature is,
119        #so for now return gml:id
120        return self.parsedFeature.id
121       
122    def getDomain(self):
123        #should return a list contianing the domain reference and compliment if any.
124        return []
125   
126    def getOutputOptions(self):
127        #return generic output options for abstract features - this may in fact be none... and will be always overwritten within specific feature types
128        pass
129   
130    def getSubset(self, **kwargs):
131        pass
132    def __writeNetCDF(self,var, timeSubset,fulldata,**kwargs):
133        """writes a NetCDF file containing feature data"""
134        pass
135
136    def __writeCSML(self, var, timeSubset,fulldata,**kwargs):
137        """writes a CSML document to descibe subsetted feature"""
138        pass
139
140class CSMLPointFeature(CSMLAbstractFeature):
141    def __init__(self,feature):
142        CSMLAbstractFeature.__init__(self,feature)
143    def getOutputOptions(self):
144            #return output options for Point feature... not implemented
145        pass
146
147class CSMLPointSeriesFeature(CSMLAbstractFeature):
148    def __init__(self,feature):
149        CSMLAbstractFeature.__init__(self,feature)
150    def getOutputOptions(self):
151            #return output options for PointSeries feature... not implemented
152        pass
153   
154class CSMLProfileFeature(CSMLAbstractFeature):
155    def __init__(self,feature):
156        CSMLAbstractFeature.__init__(self,feature)
157    def getOutputOptions(self):
158            #return output options for ProfileSeries feature... not implemented
159        pass
160    '''NOTE:this feature does not have a "testMethod" so default abstract testMethod is used from CSMLAbstractFeature'''
161    #def testMethod(self):
162    #   pass
163   
164class CSMLGridSeriesFeature(CSMLAbstractFeature):
165    def __init__(self,feature, dataset):
166        CSMLAbstractFeature.__init__(self,feature,dataset)
167    def getAllowedSubsettings(self):
168        return ['subsetToGridSeries']
169    def getOutputOptions(self):
170            #return output options for GridSeries feature... not implemented
171        pass
172   
173    def __getDomainReference(self):
174        #Inspects a time position list for the domain reference.
175        #TODO, does not handle a file extract in place of a list.
176        if isinstance(self.parsedFeature.domain.domainReference,TimePositionList):         
177            time = {}
178            time['t'] = self.parsedFeature.domain.domainReference.timePositions
179            domainref  = time
180        self.domainReference=domainref
181        return domainref
182       
183    def __getDomainComplement(self):
184        #This will return a list containing one or more ordinates:
185        #currently in form [Name, values]
186        domaincomp ={}
187        dc = self.parsedFeature.domain.domainComplement
188        #dc should be a grid!
189        if isinstance(dc, Grid):
190            for ordinate in dc.ordinates:
191                fe=CSMLAbstractFileExtract(ordinate.axisValues)
192                domaincomp[ordinate.definesAxis]=fe.getAxisData()
193                fe.close()
194        self.domainComplement=domaincomp
195        return domaincomp
196           
197    def getDomain(self):
198        #returns both the domain reference axes and domain compliment axes in a single domain dictionary
199        #axes are in no particular order
200        domain = {}
201        dr=self.__getDomainReference()
202        dc=self.__getDomainComplement()
203        for key in dc.keys():
204            domain[key]=dc[key]
205        for key in dr.keys():
206            domain[key]=dr[key]
207        return domain
208
209    def subsetToGridSeries(self, timeSubset,  **kwargs):
210        pathToSubsetCSML = 'temp.xml'
211        pathToSubsetNetCDF='temp.nc'
212        domainref = self.__getDomainReference() 
213        self.times=timeSubset
214        self.files=[]
215        strTimes=''
216        fulldata=None
217        for time in self.times:
218            listPosition=domainref['t'].split().index(time)
219            strTimes= strTimes + ' ' + time
220            for comp in self.parsedFeature.rangeSet.aggregatedArray.component:
221                var = comp.variableName
222                f=CSMLAnyFile(comp.fileName.split()[listPosition])
223                data=f.getVariableData(var, **kwargs)
224                self.files.append(comp.fileName.split()[listPosition])
225                if fulldata is None:
226                    fulldata = data.tolist()
227                else:
228                    for item in data.tolist():
229                        fulldata.append(item)
230       
231        ### create csml document####
232        domain=GridSeriesDomain()
233        domain.domainReference=TimePositionList(timePositions=strTimes) 
234        grid=Grid()
235        dc = self.__getDomainComplement()
236        ordinates= []
237        i=0
238        valueStore=[]  # use the values again later to generate netcdf
239        for key in dc.keys():
240            i=i+1
241            god=GridOrdinateDescription()
242            god.gridAxesSpanned='dim%s'%i
243            god.sequenceRule='+x+y+z'
244            god.definesAxis=key
245            straxisValues=''
246            if key in kwargs:
247                for val in dc[key]:
248                    if val >= kwargs[key][0]:
249                        if val <= kwargs[key] [1]:
250                            straxisValues=straxisValues+ str(val) + ', '
251            else: # this dimension has not been subsetted
252                for val in dc[key]:
253                    straxisValues=straxisValues+ str(val) + ', '
254            god.axisValues=straxisValues[:-2]
255            ordinates.append(god)
256        grid.ordinates=ordinates
257        domain.domainComplement=grid
258        rangeSet=RangeSet()
259        rangeSet.arrayDescriptor=NetCDFExtract(fileName='f',variableName='v',arraySize='as')
260        feat=FeatureBuilder()
261        subsetCSML=feat.makeGridSeries(domain,rangeSet)
262
263        #### write csml document #####
264        output=open(pathToSubsetCSML,'w')
265        output.write(subsetCSML)
266        output.close()
267
268        ### create and write netcdf####
269        ncfile=NetCDFFile(pathToSubsetNetCDF,'w')
270        # create the dimensions       
271        ncfile.createDimension ( 'time', len(timeSubset))
272        time_var = ncfile.createVariable ( 'time', Float, ('time',) )
273        time_var.longname = 'time'
274        floatTimes=[]
275        for time in timeSubset:
276            floatTimes.append(5) # NEED TO CONVERT 'back' from string to standard julian day.. .
277        time_var[:] =floatTimes[:]
278
279        for ordinate in ordinates:
280            ncfile.createDimension(ordinate.definesAxis, len(ordinate.axisValues.split())) 
281            item_var = ncfile.createVariable (ordinate.definesAxis, Float, (ordinate.definesAxis,) )
282            #convert to list
283            vals=[]
284            for val in ordinate.axisValues.split(','):
285                vals.append(float(val))
286            ordinate.axisValues=vals
287            item_var[:]=vals[:]
288            print ordinate.definesAxis
289        #this needs rethinking.
290        if len(ordinates)==3:
291            feature_var = ncfile.createVariable (self.parsedFeature.id, Float, ('time',ordinates[1].definesAxis,ordinates[0].definesAxis,ordinates[2].definesAxis))
292        elif len(grid.ordinates)==2:
293            feature_var = ncfile.createVariable (self.parsedFeature.id, Float, ('time',ordinates[1].definesAxis,ordinates[0].definesAxis))
294        print shape(feature_var)
295        print shape(fulldata)
296        feature_var[:]=fulldata[:]
297        ncfile.close()
298
299        return pathToSubsetCSML, pathToSubsetNetCDF
300
301
302
303class FeatureBuilder:
304    def makeGridSeries(self,domain,rangeSet):
305        fms=[] #empty featureMembers list
306        dset=Dataset()
307        feat=GridSeriesFeature()
308        feat.id= '??'
309        feat.description=Description('???')
310        feat.domain=domain
311        feat.rangeSet=rangeSet
312        fms.append(feat)
313        fc=FeatureCollection(members=fms)
314        #Create an Empty Dataset
315        ds = Dataset()
316        #Set objects as attributes of dataset
317        setattr(ds,'id','TestDataSet') 
318        #setattr(ds, 'metaDataProperty', [md])
319        setattr(ds, 'featureCollection',fc)
320        #call the toXML method of the Dataset object:
321        csml = ds.toXML()
322        strCSML=parser_extra.PrettyPrint(csml)
323        strCSML=parser_extra.removeInlineNS(strCSML)
324        return strCSML
325       
Note: See TracBrowser for help on using the repository browser.