source: TI02-CSML/branches/csml-cdms2/API/ops_FileExtract.py @ 3627

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/branches/csml-cdms2/API/ops_FileExtract.py@3627
Revision 3627, 7.0 KB checked in by spascoe, 12 years ago (diff)

This branch contains CSML converted to use cdat_lite-5.

  • convertcdms was run on the source
  • the MA.set_print_limit call was changed to the numpy equivilent
  • The tests were changed to account for the existence of numpy scalar types.

All tests, except the two known to fail, pass on i686 ubuntu.

Line 
1# Adapted for numpy/ma/cdms2 by convertcdms.py
2''' ops_AbstractFileExtract  contains operations for AbstractFileExtracts which are inherited by any FileExtracts - eg NetCDFExtracts. Methods are attached on the fly.'''
3import csml.csmllibs.csmldataiface
4import sys
5import numpy.oldnumeric as Numeric
6import numpy.oldnumeric.ma as MA
7
8
9class DataContainer(object):
10    ''' A container to wrap some of the inconsistencies of Numeric/MA etc
11    Should convert to Numpy in future'''
12    #dataArray needs to be the standards data container   
13    def __init__(self, data=None):
14        self.dataArray=None
15        if data is not None:
16            self.data=data
17            self.setup()
18   
19    def setup(self):
20        try: 
21            self.dataArray=Numeric.array(self.data)
22        except MA.MA.MAError:
23            self.dataArray=self.data.tolist()
24            self.dataArray=Numeric.array(self.dataArray)
25         
26def testmethod(self):
27    '''test method'''
28    print 'testmethod for AbstractFileExtract feature'
29    return 'testmethod AbstractFileExtract'
30
31
32def getAxisInfo(self, varName):
33    pass
34
35def getData(self,  fileposition=None,**kwargs):
36    '''
37    get data for a particular file in the file extract.
38    @param fileposition:    position (optional) of file in filelist (of paths)
39    @param kwargs:    a subset request dictionary
40    @return:    dataArray, fillvalue, axisorder, units - array of data and ancilliary info.
41    '''
42   
43    if csml.API.csmlContainer.globalContainer.OUTPUTDIR is not None:
44        directory=csml.API.csmlContainer.globalContainer.OUTPUTDIR
45    else:
46        directory=None 
47       
48       
49    #if fileposition is None, then this may be stored in the cache already.
50    try:
51        [dataArray, fillvalue, axisorder, units]=csml.API.csmlContainer.globalContainer.CACHE[self.id]
52        return dataArray, fillvalue, axisorder, units
53    except:
54        pass
55   
56    #file position defines the position of the filename if a list of filenames exists
57    #**kwargs can hold subsetting request.
58    #get the right file  ##TO DO< check this is ok with FILE LIST
59       
60    # TODO - cannot subset times within an individual file - no way to determine the original time crs..
61   
62    if fileposition is not None:
63        try:
64            file =self.fileList.fileNames.CONTENT.split()[fileposition]
65        except:
66            file=self.fileName.CONTENT
67    else:
68        file=self.fileName.CONTENT
69    #open the file
70    DI = csml.csmllibs.csmldataiface.DataInterface()
71    DI=DI.getUnknownInterfaceType(file)
72    try:
73        DI.openFile(file)
74    except:
75        if directory is not None:
76            file=directory +'/' + file
77            DI.openFile(file)
78    DI.setAxis(self.variableName.CONTENT)
79    try:
80        DI.setAxis(self.variableName.CONTENT)
81        data=DI.getDataForAxis()
82        dataArray=data
83        fillvalue=None
84        axisorder=None
85        units=None
86    except:
87        pass
88        DI.setVariable(self.variableName.CONTENT)
89        #get fill value (if there is one)   
90        fillvalue=DI.getVariableAttribute('_FillValue')
91        if kwargs:
92            data = DI.getSubsetOfDataForVar(**kwargs)
93        else:
94            data = DI.getDataForVar()
95        DC=DataContainer(data)
96        dataArray=DC.dataArray
97        units=[]
98        axisorder=DI.getVariableAxes()
99        for axis in axisorder:   
100            DI.setAxis(axis)
101            units.append(DI.getAxisAttribute('units'))
102    DI.closeFile()
103   
104    #if fileposition is none, add to cache for retrieval by id later:
105    try:
106        csml.API.csmlContainer.globalContainer.CACHE[self.id]=[dataArray, fillvalue, axisorder, units]
107    except:
108        pass
109    return dataArray, fillvalue, axisorder, units
110       
111def getDataFromChunks(self, minIdx, maxIdx):
112    '''given a list of files of unknown length and an index range spanning some or all of those files, retuns the data from that index range. Only works with single dimensional data.
113    e.g. used for QXF
114    @param minIdx:   start index
115    @param maxIndex:    end index
116    @return:     dataarray, fillvalue   
117    '''
118   
119    minIndex=minIdx
120    maxIndex=maxIdx
121    if csml.API.csmlContainer.globalContainer.OUTPUTDIR is not None:
122        directory=csml.API.csmlContainer.globalContainer.OUTPUTDIR
123    else:
124        directory=None   
125    data=[]
126    totalarraylength = 0   
127    filenames=[]
128    try:
129        for file in self.fileList.fileNames.CONTENT.split():
130            filenames.append(file)
131    except:
132        filenames.append(self.fileName.CONTENT)
133   
134    for file in filenames:
135        DI=csml.csmllibs.csmldataiface.DataInterface()
136        DI=DI.getUnknownInterfaceType(file) 
137        try:
138            DI.openFile(file)
139        except:
140            if directory is not None:
141                file=directory +'/' + file
142                DI.openFile(file)
143        DI.setVariable(self.variableName.CONTENT)
144        datachunk=DI.getDataForVar()                   
145        chunklength=len(datachunk)
146        startpoint=totalarraylength
147        totalarraylength=totalarraylength +chunklength
148               
149        if minIndex >= totalarraylength:
150            #print 'before selection, skipping %s'%file
151            continue
152       
153        elif minIndex < totalarraylength:
154            if minIndex >= startpoint:
155                sliceMin=minIndex-startpoint
156                #print 'selection starts in %s at index %s'%(file, sliceMin)
157                if maxIndex < totalarraylength:
158                    #print 'selection self contained in %s'%file
159                    sliceMax=sliceMin + (maxIndex-minIndex) +1
160                    data=datachunk[sliceMin:sliceMax]
161                    continue
162                else:
163                    #print 'selection goes beyond this file'
164                    data.append(datachunk[sliceMin: len(datachunk)])
165                    continue
166       
167        if maxIndex  < startpoint:
168            #print 'gone past end of selection, skipping %s'%file
169            continue
170        elif maxIndex >= totalarraylength:
171            #get the whole chunk
172            data.append(datachunk)
173            continue
174       
175        elif maxIndex <= totalarraylength:     
176            sliceMax=(maxIndex-startpoint) +1
177            data.append(datachunk[0:sliceMax])
178            continue   
179    fillvalue=DI.getVariableAttribute('_FillValue')
180    axisorder=DI.getVariableAxes()
181     
182    DI.closeFile()
183    if type(data) is MA.MA.MaskedArray:
184        result =data
185    else:
186        result=MA.concatenate(data)
187    #try:
188        #result=MA.concatenate(data)
189    #except:
190        #result =data
191    return result, fillvalue
192       
193def __calendar(file,timedim):
194    ''' given a file and the name of the time dimension returns the name of the calendar and units used. eg 360day, days since 2001:01:01
195    @param file:   filepath
196    @param timedim:     name of time dimension
197    @return:    calendar name, calendar units
198    '''
199    DI = csmldataiface.DataInterface()
200    DI=DI.getUnknownInterfaceType(file)
201    DI.openFile(file)
202    DI.setAxis(timedim)
203    cal=DI.getAxisAttribute('calendar')
204    units=DI.getAxisAttribute('units')
205    return cal, units
206   
Note: See TracBrowser for help on using the repository browser.