1 | ''' ops_AbstractFileExtract contains operations for AbstractFileExtracts which are inherited by any FileExtracts - eg NetCDFExtracts. Methods are attached on the fly.''' |
---|
2 | import csml.csmllibs.csmldataiface |
---|
3 | import sys |
---|
4 | import Numeric |
---|
5 | import MA |
---|
6 | |
---|
7 | |
---|
8 | class DataContainer(object): |
---|
9 | ''' A container to wrap some of the inconsistencies of Numeric/MA etc |
---|
10 | Should convert to Numpy in future''' |
---|
11 | #dataArray needs to be the standards data container |
---|
12 | def __init__(self, data=None): |
---|
13 | self.dataArray=None |
---|
14 | if data is not None: |
---|
15 | self.data=data |
---|
16 | self.setup() |
---|
17 | |
---|
18 | def setup(self): |
---|
19 | try: |
---|
20 | self.dataArray=Numeric.array(self.data) |
---|
21 | except MA.MA.MAError: |
---|
22 | self.dataArray=self.data.tolist() |
---|
23 | self.dataArray=Numeric.array(self.dataArray) |
---|
24 | |
---|
25 | def testmethod(self): |
---|
26 | '''test method''' |
---|
27 | print 'testmethod for AbstractFileExtract feature' |
---|
28 | return 'testmethod AbstractFileExtract' |
---|
29 | |
---|
30 | |
---|
31 | def getAxisInfo(self, varName): |
---|
32 | pass |
---|
33 | |
---|
34 | def getData(self, fileposition=None,**kwargs): |
---|
35 | ''' |
---|
36 | get data for a particular file in the file extract. |
---|
37 | @param fileposition: position (optional) of file in filelist (of paths) |
---|
38 | @param kwargs: a subset request dictionary |
---|
39 | @return: dataArray, fillvalue, axisorder, units - array of data and ancilliary info. |
---|
40 | ''' |
---|
41 | |
---|
42 | if csml.API.csmlContainer.globalContainer.OUTPUTDIR is not None: |
---|
43 | directory=csml.API.csmlContainer.globalContainer.OUTPUTDIR |
---|
44 | else: |
---|
45 | directory=None |
---|
46 | |
---|
47 | |
---|
48 | #if fileposition is None, then this may be stored in the cache already. |
---|
49 | try: |
---|
50 | [dataArray, fillvalue, axisorder, units]=csml.API.csmlContainer.globalContainer.CACHE[self.id] |
---|
51 | return dataArray, fillvalue, axisorder, units |
---|
52 | except: |
---|
53 | pass |
---|
54 | |
---|
55 | #file position defines the position of the filename if a list of filenames exists |
---|
56 | #**kwargs can hold subsetting request. |
---|
57 | #get the right file ##TO DO< check this is ok with FILE LIST |
---|
58 | |
---|
59 | # TODO - cannot subset times within an individual file - no way to determine the original time crs.. |
---|
60 | |
---|
61 | if fileposition is not None: |
---|
62 | try: |
---|
63 | file =self.fileList.fileNames.CONTENT.split()[fileposition] |
---|
64 | except: |
---|
65 | file=self.fileName.CONTENT |
---|
66 | else: |
---|
67 | file=self.fileName.CONTENT |
---|
68 | #open the file |
---|
69 | DI = csml.csmllibs.csmldataiface.DataInterface() |
---|
70 | DI=DI.getUnknownInterfaceType(file) |
---|
71 | try: |
---|
72 | DI.openFile(file) |
---|
73 | except: |
---|
74 | if directory is not None: |
---|
75 | file=directory +'/' + file |
---|
76 | DI.openFile(file) |
---|
77 | DI.setAxis(self.variableName.CONTENT) |
---|
78 | try: |
---|
79 | DI.setAxis(self.variableName.CONTENT) |
---|
80 | data=DI.getDataForAxis() |
---|
81 | dataArray=data |
---|
82 | fillvalue=None |
---|
83 | axisorder=None |
---|
84 | units=None |
---|
85 | except: |
---|
86 | pass |
---|
87 | DI.setVariable(self.variableName.CONTENT) |
---|
88 | #get fill value (if there is one) |
---|
89 | fillvalue=DI.getVariableAttribute('_FillValue') |
---|
90 | if kwargs: |
---|
91 | data = DI.getSubsetOfDataForVar(**kwargs) |
---|
92 | else: |
---|
93 | data = DI.getDataForVar() |
---|
94 | DC=DataContainer(data) |
---|
95 | dataArray=DC.dataArray |
---|
96 | units=[] |
---|
97 | axisorder=DI.getVariableAxes() |
---|
98 | for axis in axisorder: |
---|
99 | DI.setAxis(axis) |
---|
100 | units.append(DI.getAxisAttribute('units')) |
---|
101 | DI.closeFile() |
---|
102 | |
---|
103 | #if fileposition is none, add to cache for retrieval by id later: |
---|
104 | try: |
---|
105 | csml.API.csmlContainer.globalContainer.CACHE[self.id]=[dataArray, fillvalue, axisorder, units] |
---|
106 | except: |
---|
107 | pass |
---|
108 | return dataArray, fillvalue, axisorder, units |
---|
109 | |
---|
110 | def getDataFromChunks(self, minIdx, maxIdx): |
---|
111 | '''given a list of files of unknown length and an index range spanning some or all of those files, retuns the data from that index range. Only works with single dimensional data. |
---|
112 | e.g. used for QXF |
---|
113 | @param minIdx: start index |
---|
114 | @param maxIndex: end index |
---|
115 | @return: dataarray, fillvalue |
---|
116 | ''' |
---|
117 | |
---|
118 | minIndex=minIdx |
---|
119 | maxIndex=maxIdx |
---|
120 | if csml.API.csmlContainer.globalContainer.OUTPUTDIR is not None: |
---|
121 | directory=csml.API.csmlContainer.globalContainer.OUTPUTDIR |
---|
122 | else: |
---|
123 | directory=None |
---|
124 | data=[] |
---|
125 | totalarraylength = 0 |
---|
126 | filenames=[] |
---|
127 | try: |
---|
128 | for file in self.fileList.fileNames.CONTENT.split(): |
---|
129 | filenames.append(file) |
---|
130 | except: |
---|
131 | filenames.append(self.fileName.CONTENT) |
---|
132 | |
---|
133 | for file in filenames: |
---|
134 | DI=csml.csmllibs.csmldataiface.DataInterface() |
---|
135 | DI=DI.getUnknownInterfaceType(file) |
---|
136 | try: |
---|
137 | DI.openFile(file) |
---|
138 | except: |
---|
139 | if directory is not None: |
---|
140 | file=directory +'/' + file |
---|
141 | DI.openFile(file) |
---|
142 | DI.setVariable(self.variableName.CONTENT) |
---|
143 | datachunk=DI.getDataForVar() |
---|
144 | chunklength=len(datachunk) |
---|
145 | startpoint=totalarraylength |
---|
146 | totalarraylength=totalarraylength +chunklength |
---|
147 | |
---|
148 | if minIndex >= totalarraylength: |
---|
149 | #print 'before selection, skipping %s'%file |
---|
150 | continue |
---|
151 | |
---|
152 | elif minIndex < totalarraylength: |
---|
153 | if minIndex >= startpoint: |
---|
154 | sliceMin=minIndex-startpoint |
---|
155 | #print 'selection starts in %s at index %s'%(file, sliceMin) |
---|
156 | if maxIndex < totalarraylength: |
---|
157 | #print 'selection self contained in %s'%file |
---|
158 | sliceMax=sliceMin + (maxIndex-minIndex) +1 |
---|
159 | data=datachunk[sliceMin:sliceMax] |
---|
160 | continue |
---|
161 | else: |
---|
162 | #print 'selection goes beyond this file' |
---|
163 | data.append(datachunk[sliceMin: len(datachunk)]) |
---|
164 | continue |
---|
165 | |
---|
166 | if maxIndex < startpoint: |
---|
167 | #print 'gone past end of selection, skipping %s'%file |
---|
168 | continue |
---|
169 | elif maxIndex >= totalarraylength: |
---|
170 | #get the whole chunk |
---|
171 | data.append(datachunk) |
---|
172 | continue |
---|
173 | |
---|
174 | elif maxIndex <= totalarraylength: |
---|
175 | sliceMax=(maxIndex-startpoint) +1 |
---|
176 | data.append(datachunk[0:sliceMax]) |
---|
177 | continue |
---|
178 | fillvalue=DI.getVariableAttribute('_FillValue') |
---|
179 | axisorder=DI.getVariableAxes() |
---|
180 | |
---|
181 | DI.closeFile() |
---|
182 | if type(data) is MA.MA.MaskedArray: |
---|
183 | result =data |
---|
184 | else: |
---|
185 | result=MA.concatenate(data) |
---|
186 | #try: |
---|
187 | #result=MA.concatenate(data) |
---|
188 | #except: |
---|
189 | #result =data |
---|
190 | return result, fillvalue |
---|
191 | |
---|
192 | def __calendar(file,timedim): |
---|
193 | ''' given a file and the name of the time dimension returns the name of the calendar and units used. eg 360day, days since 2001:01:01 |
---|
194 | @param file: filepath |
---|
195 | @param timedim: name of time dimension |
---|
196 | @return: calendar name, calendar units |
---|
197 | ''' |
---|
198 | DI = csmldataiface.DataInterface() |
---|
199 | DI=DI.getUnknownInterfaceType(file) |
---|
200 | DI.openFile(file) |
---|
201 | DI.setAxis(timedim) |
---|
202 | cal=DI.getAxisAttribute('calendar') |
---|
203 | units=DI.getAxisAttribute('units') |
---|
204 | return cal, units |
---|
205 | |
---|