1 | """A set of classes running checks and providing utilities to support checks""" |
---|
2 | import string, re, os, sys, traceback, ctypes |
---|
3 | |
---|
4 | def strmm3( mm ): |
---|
5 | return string.join( map( lambda x: '%s="%s" [correct: "%s"]' % x, mm ), '; ' ) |
---|
6 | |
---|
7 | from fcc_utils import mipTableScan |
---|
8 | from xceptions import * |
---|
9 | |
---|
10 | re_dlr01 = re.compile( 'MERGED-DLR_1M-[0-9]{8}-fv0100.nc$' ) |
---|
11 | |
---|
12 | class timeInt(object): |
---|
13 | |
---|
14 | vc = {'gregorian':0, 'standard':0, 'proleptic_gregorian':0, 'noleap':1, '365_day':1, 'all_leap':2, '366_day':2, '360_day':3, 'julian':0, 'none':None} |
---|
15 | mnmx = [ (365,366),(365,365),(366,366),(360,360) ] |
---|
16 | mmnmmx = [ (28,31),(28,31),(29,31),(30,30) ] |
---|
17 | def __init__(self,cal='proleptic_gregorian',dpymn=None, dpymx=None,dpmmn=None,dpmmx=None,tol=1.e-6): |
---|
18 | self.tol = tol |
---|
19 | if not self.vc.has_key(cal) or cal == None: |
---|
20 | assert dpymx != None and dpymn != None, 'If standard calendar is not use, dpymn and dpymx must be set' |
---|
21 | assert dpmmx != None and dpmmn != None, 'If standard calendar is not use, dpmmn and dpmmx must be set' |
---|
22 | self.dpymn = dpymn - tol |
---|
23 | self.dpymx = dpymx + tol |
---|
24 | self.dpmmn = dpmmn - tol |
---|
25 | self.dpmmx = dpmmx + tol |
---|
26 | else: |
---|
27 | self.dpymn = self.mnmx[self.vc[cal]][0] - tol |
---|
28 | self.dpymx = self.mnmx[self.vc[cal]][1] + tol |
---|
29 | self.dpmmn = self.mmnmmx[self.vc[cal]][0] - tol |
---|
30 | self.dpmmx = self.mmnmmx[self.vc[cal]][1] + tol |
---|
31 | self.map = { 'yr':'P1Y','monClim':'P1M','mon':'P1M','day':'P1D','6hr':'P6H','3hr':'P3H'} |
---|
32 | self.nd = { 'x':'y' } |
---|
33 | |
---|
34 | def setUnit(self,u): |
---|
35 | if u not in ['days','months','years']: |
---|
36 | print 'Time unit %s not supported' % u |
---|
37 | self.u = None |
---|
38 | else: |
---|
39 | self.u = u |
---|
40 | |
---|
41 | def chk(self,v,u,f): |
---|
42 | if not self.map.has_key(f): |
---|
43 | return (0,'No frequency check available for f = %s' % f ) |
---|
44 | if u not in ['days']: |
---|
45 | return (0,'No frequency check available for units = %s' % u ) |
---|
46 | x = self.map(f) |
---|
47 | if x == 'P1Y': |
---|
48 | return (v > self.dpymn) and (v < self.dpymx) |
---|
49 | elif x == 'P1M': |
---|
50 | return (v > self.dpmmn) and (v < self.dpmmx) |
---|
51 | elif x == 'P1D': |
---|
52 | return (v > 1.-self.tol) and (v < 1.+self.tol) |
---|
53 | elif x == 'P6H': |
---|
54 | return (v > 0.25-self.tol) and (v < 0.25+self.tol) |
---|
55 | elif x == 'P3H': |
---|
56 | return (v > 0.125-self.tol) and (v < 0.125+self.tol) |
---|
57 | |
---|
58 | |
---|
59 | class reportSection(object): |
---|
60 | |
---|
61 | def __init__(self,id,cls,parent=None, description=None): |
---|
62 | self.id = id |
---|
63 | self.cls = cls |
---|
64 | self.parent = parent |
---|
65 | self.description = description |
---|
66 | self.records = [] |
---|
67 | self.subsections = [] |
---|
68 | self.closed = False |
---|
69 | self.npass = 0 |
---|
70 | self.fail = 0 |
---|
71 | self.auditDone = True |
---|
72 | |
---|
73 | def addSubSection( self, id, cls, description=None): |
---|
74 | assert not self.closed, 'Attempt to add sub-section to closed report section' |
---|
75 | self.subsections.append( reportSection(id, cls, parent=self, description=description ) ) |
---|
76 | self.auditDone = False |
---|
77 | return self.subsections[-1] |
---|
78 | |
---|
79 | def addRecord( self, id, cls, res, msg ): |
---|
80 | assert not self.closed, 'Attempt to add record to closed report section' |
---|
81 | self.records.append( (id, cls, res, msg) ) |
---|
82 | self.auditDone = False |
---|
83 | |
---|
84 | def close(self): |
---|
85 | self.closed = True |
---|
86 | |
---|
87 | def reopen(self): |
---|
88 | self.closed = False |
---|
89 | |
---|
90 | def audit(self): |
---|
91 | if self.auditDone: |
---|
92 | return |
---|
93 | self.closed = True |
---|
94 | self.fail = 0 |
---|
95 | self.npass = 0 |
---|
96 | for ss in self.subsections: |
---|
97 | ss.audit() |
---|
98 | self.fail += ss.fail |
---|
99 | self.npass += ss.npass |
---|
100 | |
---|
101 | for r in self.records: |
---|
102 | if r[2]: |
---|
103 | self.npass += 1 |
---|
104 | else: |
---|
105 | self.fail += 1 |
---|
106 | |
---|
107 | class checkSeq(object): |
---|
108 | def __init__(self): |
---|
109 | pass |
---|
110 | |
---|
111 | def check(self,x): |
---|
112 | d = map( lambda i: x[i+1] - x[i], range(len(x)-1) ) |
---|
113 | self.delt = sum(d)/len(d) |
---|
114 | self.dmx = max(d) |
---|
115 | self.dmn = min(d) |
---|
116 | return self.dmx - self.dmn < abs(self.delt)*1.e-4 |
---|
117 | |
---|
118 | cs = checkSeq() |
---|
119 | |
---|
120 | class checkBase(object): |
---|
121 | """Base class for checks, containing a set of standard methods for managing operation of checks and logging of results""" |
---|
122 | |
---|
123 | def __init__(self,cls="CORDEX",reportPass=True,parent=None,monitor=None): |
---|
124 | """Creat class instance: set defaults, link arguments to instance, create a range of compiled regular expressions""" |
---|
125 | self.cls = cls |
---|
126 | self.project = cls |
---|
127 | self.abortMessageCount = parent.abortMessageCount |
---|
128 | self.monitor = monitor |
---|
129 | self.re_isInt = re.compile( '[0-9]+' ) |
---|
130 | self.errorCount = 0 |
---|
131 | self.passCount = 0 |
---|
132 | self.missingValue = 1.e20 |
---|
133 | self.missingValue = ctypes.c_float(1.00000002004e+20).value |
---|
134 | from file_utils import ncLib |
---|
135 | if ncLib == 'netCDF4': |
---|
136 | import numpy |
---|
137 | self.missingValue = numpy.float32(self.missingValue) |
---|
138 | self.parent = parent |
---|
139 | self.reportPass=reportPass |
---|
140 | self.pcfg = parent.pcfg |
---|
141 | ################################ |
---|
142 | self.requiredGlobalAttributes = self.pcfg.requiredGlobalAttributes |
---|
143 | self.controlledGlobalAttributes = self.pcfg.controlledGlobalAttributes |
---|
144 | self.globalAttributesInFn = self.pcfg.globalAttributesInFn |
---|
145 | self.requiredVarAttributes = self.pcfg.requiredVarAttributes |
---|
146 | self.drsMappings = self.pcfg.drsMappings |
---|
147 | ####################################### |
---|
148 | self.checks = () |
---|
149 | self.messageCount = 0 |
---|
150 | self.init() |
---|
151 | if not hasattr( self.parent, 'amapListDraft' ): |
---|
152 | self.parent.amapListDraft = [] |
---|
153 | |
---|
154 | def isInt(self,x): |
---|
155 | """Check that a string is a representation of an integer""" |
---|
156 | return self.re_isInt.match( x ) != None |
---|
157 | |
---|
158 | def logMessage(self, msg, error=False ): |
---|
159 | """Log messages and count messages""" |
---|
160 | self.messageCount += 1 |
---|
161 | assert self.abortMessageCount < 0 or self.abortMessageCount > self.messageCount, 'Raising error [TESTX01], perhaps for testing' |
---|
162 | if self.parent != None and self.parent.log != None: |
---|
163 | if error: |
---|
164 | self.parent.log.error( msg ) |
---|
165 | else: |
---|
166 | self.parent.log.info( msg ) |
---|
167 | else: |
---|
168 | print msg |
---|
169 | |
---|
170 | doThis = True |
---|
171 | if self.appendLogfile[0] != None and doThis: |
---|
172 | if self.monitor != None: |
---|
173 | nofh0 = self.monitor.get_open_fds() |
---|
174 | xlog = self.c4i.getFileLog( self.appendLogfile[1], flf=self.appendLogfile[0] ) |
---|
175 | if error: |
---|
176 | xlog.error( msg ) |
---|
177 | else: |
---|
178 | xlog.info( msg ) |
---|
179 | self.c4i.closeFileLog() |
---|
180 | if self.monitor != None: |
---|
181 | nofh9 = self.monitor.get_open_fds() |
---|
182 | if nofh9 > nofh0: |
---|
183 | print 'Leaking file handles [1]: %s --- %s' % (nofh0, nofh9) |
---|
184 | |
---|
185 | def log_exception( self, msg): |
---|
186 | """Logging of exceptions -- putting trace information in log files""" |
---|
187 | if self.parent != None and self.parent.log != None: |
---|
188 | self.parent.log.error("Exception has occured" ,exc_info=1) |
---|
189 | else: |
---|
190 | traceback.print_exc(file=sys.stdout) |
---|
191 | |
---|
192 | def log_error( self, msg ): |
---|
193 | """Create an error log message and call logMessage; count errors;""" |
---|
194 | self.lastError = msg |
---|
195 | self.errorCount += 1 |
---|
196 | self.logMessage( '%s.%s: FAILED:: %s' % (self.id,self.getCheckId(),msg), error=True ) |
---|
197 | |
---|
198 | def log_pass( self ): |
---|
199 | """Create a pass log message and call logMessage; count passes;""" |
---|
200 | self.passCount = True |
---|
201 | if self.reportPass: |
---|
202 | self.logMessage( '%s.%s: OK' % (self.id,self.getCheckId()) ) |
---|
203 | |
---|
204 | def log_abort( self ): |
---|
205 | self.completed = False |
---|
206 | self.logMessage( '%s.%s: ABORTED:: Errors too severe to complete further checks in this module' % (self.id,'xxx') ) |
---|
207 | raise abortChecks |
---|
208 | |
---|
209 | def status(self): |
---|
210 | return '%s.%s' % (self.id,self.getCheckId()) |
---|
211 | |
---|
212 | def getCheckId(self,full=True): |
---|
213 | if type( self.checkId ) == type( 'x' ): |
---|
214 | return self.checkId |
---|
215 | else: |
---|
216 | if full: |
---|
217 | return '%s: [%s]' % self.checkId |
---|
218 | else: |
---|
219 | return self.checkId[0] |
---|
220 | |
---|
221 | def test(self,res,msg,abort=False,part=False,appendLogfile=(None,None)): |
---|
222 | """Handle test results. |
---|
223 | :param res: [True/False] result of test; |
---|
224 | :param msg: Message describing the test; |
---|
225 | :param abort: {optional} Set True if checks should be aborted when test fails; |
---|
226 | :param part: {optional} Set True if this is a component of a test (logging of pass suppressed); |
---|
227 | :param appendLogfile: {optional} Allows results to be appended to pre-existing log file; |
---|
228 | """ |
---|
229 | self.appendLogfile = appendLogfile |
---|
230 | if res: |
---|
231 | if not part: |
---|
232 | self.log_pass() |
---|
233 | else: |
---|
234 | self.log_error(msg) |
---|
235 | if abort: |
---|
236 | self.log_abort() |
---|
237 | return res |
---|
238 | |
---|
239 | def runChecks(self): |
---|
240 | """Run all the checks registered in this instance (in self.checks) and handle exceptions""" |
---|
241 | |
---|
242 | try: |
---|
243 | for c in self.checks: |
---|
244 | c() # run check |
---|
245 | self.completed = True |
---|
246 | except abortChecks: |
---|
247 | ## error logging done before raising this exception |
---|
248 | return |
---|
249 | except: |
---|
250 | self.log_exception( 'Exception caught by runChecks' ) |
---|
251 | raise loggedException |
---|
252 | |
---|
253 | class checkFileName(checkBase): |
---|
254 | """Check basic syntax of file names (i.e. checks properties of the text string, it does not attempt to access the file). |
---|
255 | Inherits :class:`checkBase` class. Checks are run by the :meth:`check` method.""" |
---|
256 | |
---|
257 | def init(self): |
---|
258 | self.id = 'C4.001' |
---|
259 | self.checkId = 'unset' |
---|
260 | self.isFixed = False |
---|
261 | self.step = 'Initialised' |
---|
262 | self.checks = (self.do_check_fn,self.do_check_fnextra) |
---|
263 | self.re_c1 = re.compile( '^[0-9]*$' ) |
---|
264 | self.fnDict = {} |
---|
265 | #### |
---|
266 | |
---|
267 | def check(self,fn): |
---|
268 | """Initiate checks: manage arguments and then call *runChecks* (inherited from checkBase class). |
---|
269 | Arguments: fn: file name: the file name to be checked.""" |
---|
270 | self.errorCount = 0 |
---|
271 | assert type(fn) in [type('x'),type(u'x')], '1st argument to "check" method of checkGrids shound be a string variable name (not %s)' % type(fn) |
---|
272 | self.fn = fn |
---|
273 | self.fnsep = self.pcfg.fNameSep |
---|
274 | |
---|
275 | self.runChecks() |
---|
276 | self.parent.fnDict = self.fnDict |
---|
277 | ### |
---|
278 | def do_check_fn(self): |
---|
279 | """Basic file name checks: |
---|
280 | (1) Check suffix; |
---|
281 | (1b) [for ESA-CCI files] check presence of "ESACCI" and identify file naming convention; |
---|
282 | (2) Split file name into components and check number of such components; |
---|
283 | (3) Additional specialist checks for ESA-CCI, CORDEX, CMIP-type (for the time range). |
---|
284 | """ |
---|
285 | fn = self.fn |
---|
286 | self.errorCount = 0 |
---|
287 | self.completed = False |
---|
288 | |
---|
289 | ## check basic parsing of file name |
---|
290 | self.checkId = ('001','parse_filename') |
---|
291 | self.test( fn[-3:] == '.nc', 'File name ending ".nc" expected', abort=True, part=True ) |
---|
292 | bits = string.split( fn[:-3], self.fnsep ) |
---|
293 | |
---|
294 | self.fnParts = bits[:] |
---|
295 | if self.pcfg.domainIndex != None: |
---|
296 | self.domain = self.fnParts[self.pcfg.domainIndex] |
---|
297 | else: |
---|
298 | self.domain = None |
---|
299 | |
---|
300 | |
---|
301 | if self.pcfg.projectV.id in ['ESA-CCI']: |
---|
302 | self.test( 'ESACCI' in bits[:2] or 'ESA' == bits[0], 'File name not a valid ESA-CCI file name: %s' % fn, abort=True ) |
---|
303 | if bits[0] == 'ESA': |
---|
304 | self.esaFnId = 2 |
---|
305 | elif bits[0] == 'ESACCI': |
---|
306 | self.esaFnId = 1 |
---|
307 | else: |
---|
308 | self.esaFnId = 0 |
---|
309 | bb = string.split( bits[2], '_' ) |
---|
310 | self.test( bits[2][0] == 'L' and len(bb) == 2, 'Cannot parse ESA-CCI file name: %s' % fn, abort=True ) |
---|
311 | bits = bits[:2] + bb + bits[3:] |
---|
312 | self.fnParts = bits[:] |
---|
313 | |
---|
314 | self.pcfg.setEsaCciFNType(self.esaFnId) |
---|
315 | self.test( len(bits) in self.pcfg.fnPartsOkLen, 'File name not parsed in %s elements [%s]' % (str(self.pcfg.fnPartsOkLen),str(bits)), abort=True ) |
---|
316 | |
---|
317 | self.fnDict = {} |
---|
318 | if self.pcfg.projectV.id in ['ESA-CCI']: |
---|
319 | l0 = {0:6, 1:5, 2:5}[self.esaFnId] |
---|
320 | for i in range(l0): |
---|
321 | x = self.pcfg.globalAttributesInFn[i] |
---|
322 | if x != None and x[0] == '*': |
---|
323 | self.fnDict[x[1:]] = bits[i] |
---|
324 | self.fnDict['version'] = bits[-1] |
---|
325 | self.fnDict['gdsv'] = 'na' |
---|
326 | if self.esaFnId == 0: |
---|
327 | if len(bits) == 9: |
---|
328 | self.fnDict['additional'] = bits[-3] |
---|
329 | self.fnDict['gdsv'] = bits[-2] |
---|
330 | elif len(bits) == 8: |
---|
331 | if bits[-2][0] == 'v': |
---|
332 | self.fnDict['gdsv'] = bits[-2] |
---|
333 | else: |
---|
334 | self.fnDict['additional'] = bits[-2] |
---|
335 | elif self.esaFnId in [1,2]: |
---|
336 | if len(bits) == 8: |
---|
337 | self.fnDict['additional'] = bits[-3] |
---|
338 | |
---|
339 | |
---|
340 | |
---|
341 | if self.pcfg.groupIndex != None: |
---|
342 | self.group = self.fnParts[self.pcfg.groupIndex] |
---|
343 | else: |
---|
344 | self.group = None |
---|
345 | |
---|
346 | if self.pcfg.freqIndex != None: |
---|
347 | self.freq = self.fnParts[self.pcfg.freqIndex] |
---|
348 | elif self.group in ['fx','fixed']: |
---|
349 | self.freq = 'fx' |
---|
350 | else: |
---|
351 | self.freq = None |
---|
352 | |
---|
353 | ##if self.cls == 'CORDEX': |
---|
354 | ##self.freq = self.fnParts[7] |
---|
355 | ##elif self.cls == 'SPECS': |
---|
356 | ##self.freq = self.fnParts[1] |
---|
357 | |
---|
358 | self.var = self.fnParts[self.pcfg.varIndex] |
---|
359 | |
---|
360 | if self.pcfg.fnvdict != None: |
---|
361 | if self.pcfg.fnvdict.has_key( self.var ): |
---|
362 | self.var = self.pcfg.fnvdict.get( self.var )['v'] |
---|
363 | else: |
---|
364 | addi = self.fnDict.get('additional','xxxx') |
---|
365 | thiskey = '%s:%s' % (self.var,addi) |
---|
366 | if self.pcfg.fnvdict.has_key( thiskey ): |
---|
367 | self.var = self.pcfg.fnvdict.get( thiskey )['v'] |
---|
368 | |
---|
369 | self.isFixed = self.freq in ['fx','fixed'] |
---|
370 | self.parent.fileIsFixed = True |
---|
371 | if self.isFixed: |
---|
372 | self.test( len(self.fnParts) in self.pcfg.fnPartsOkFixedLen, 'Number of file name elements not acceptable for fixed data' ) |
---|
373 | |
---|
374 | self.checkId = ('002','parse_filename_timerange') |
---|
375 | if not self.isFixed: |
---|
376 | |
---|
377 | ## test time segment |
---|
378 | if self.pcfg.trangeType == 'CMIP': |
---|
379 | bits = string.split( self.fnParts[-1], '-' ) |
---|
380 | self.test( len(bits) == 2, 'File time segment [%s] will not parse into 2 elements' % (self.fnParts[-1] ), abort=True, part=True ) |
---|
381 | |
---|
382 | self.test( len(bits[0]) == len(bits[1]), 'Start and end time specified in file name [%s] of unequal length' % (self.fnParts[-1] ), abort=True, part=True ) |
---|
383 | self.test( int(bits[0]) <= int(bits[1]), 'Start and end time specified in file name [%s] in wrong order' % (self.fnParts[-1] ), abort=True, part=True ) |
---|
384 | |
---|
385 | for b in bits: |
---|
386 | self.test( self.isInt(b), 'Time segment in filename [%s] contains non integer characters' % (self.fnParts[-1] ), abort=True, part=True ) |
---|
387 | self.log_pass() |
---|
388 | self.fnTimeParts = bits[:] |
---|
389 | elif self.pcfg.trangeType == 'ESA-CCI': |
---|
390 | self.pcfg.checkTrangeLen = False |
---|
391 | tt = self.fnParts[self.pcfg.trangeIndex] |
---|
392 | if self.test( len(tt) in [4,6,8,10,12,14] and self.re_c1.match(tt) != None, 'Length of indicative date/time not consistent with YYYY[MM[DD[HH[MM[SS]]]]] specification: %s' % self.fnParts[-1], part=True ): |
---|
393 | ll = [tt[:4],] |
---|
394 | tt = tt[4:] |
---|
395 | for j in range(5): |
---|
396 | if len(tt) > 0: |
---|
397 | ll.append( tt[:2] ) |
---|
398 | tt = tt[2:] |
---|
399 | elif j in [1,2]: |
---|
400 | ll.append( '01' ) |
---|
401 | else: |
---|
402 | ll.append( '00' ) |
---|
403 | indDateTime = map( int, ll ) |
---|
404 | self.test( indDateTime[1] in range(1,13), 'Invalid Month in indicative date time %s' % str(ll), part=True ) |
---|
405 | self.test( indDateTime[2] in range(1,32), 'Invalid Day in indicative date time %s' % str(ll), part=True ) |
---|
406 | self.test( indDateTime[3] in range(25), 'Invalid hour in indicative date time %s' % str(ll), part=True ) |
---|
407 | self.test( indDateTime[4] in range(60), 'Invalid minute in indicative date time %s' % str(ll), part=True ) |
---|
408 | self.test( indDateTime[5] in range(60), 'Invalid second in indicative date time %s' % str(ll), part=True ) |
---|
409 | |
---|
410 | self.checkId = '003' |
---|
411 | |
---|
412 | self.checkId, ok = (('004','filename_timerange_length'),True) |
---|
413 | if (not self.isFixed) and self.pcfg.checkTrangeLen: |
---|
414 | ltr = { 'mon':6, 'sem':6, 'day':8, '3hr':[10,12], '6hr':10 } |
---|
415 | ok &=self.test( self.freq in ltr.keys(), 'Frequency [%s] not recognised' % self.freq, part=True ) |
---|
416 | if ok: |
---|
417 | if type( ltr[self.freq] ) == type(0): |
---|
418 | msg = 'Length of time range parts [%s,%s] not equal to required length [%s] for frequency %s' % (self.fnTimeParts[0],self.fnTimeParts[1],ltr[self.freq],self.freq) |
---|
419 | ok &= self.test( len(self.fnTimeParts[0]) == ltr[self.freq], msg, part=True ) |
---|
420 | elif type( ltr[self.freq] ) in [type([]),type( () )]: |
---|
421 | msg = 'Length of time range parts [%s,%s] not in acceptable list [%s] for frequency %s' % (self.fnTimeParts[0],self.fnTimeParts[1],str(ltr[self.freq]),self.freq) |
---|
422 | ok &= self.test( len(self.fnTimeParts[0]) in ltr[self.freq], msg, part=True ) |
---|
423 | |
---|
424 | if ok: |
---|
425 | self.log_pass() |
---|
426 | |
---|
427 | def do_check_fnextra(self): |
---|
428 | """Check whether file name components match constraints -- but only if those constraints are not implicitly verified through comparison with global attributes in later checks""" |
---|
429 | self.checkId = ('004','file_name_extra' ) |
---|
430 | vocabs = self.pcfg.vocabs |
---|
431 | m = [] |
---|
432 | for a in self.pcfg.controlledFnParts: |
---|
433 | if self.fnDict.has_key(a): |
---|
434 | try: |
---|
435 | if not vocabs[a].check( str(self.fnDict[a]) ): |
---|
436 | m.append( (a,self.fnDict[a],vocabs[a].note) ) |
---|
437 | except: |
---|
438 | print 'failed trying to check file name component %s' % a |
---|
439 | raise baseException( 'failed trying to check file name component %s' % a ) |
---|
440 | |
---|
441 | self.test( len(m) == 0, 'File name components do not match constraints: %s' % str(m) ) |
---|
442 | |
---|
443 | |
---|
444 | class checkGlobalAttributes(checkBase): |
---|
445 | """Check global and variable attributes, using tables of valid values""" |
---|
446 | |
---|
447 | def init(self): |
---|
448 | self.id = 'C4.002' |
---|
449 | self.checkId = 'unset' |
---|
450 | self.step = 'Initialised' |
---|
451 | self.checks = (self.do_check_ga,) |
---|
452 | self.fileId = None |
---|
453 | |
---|
454 | def check(self,globalAts, varAts,varName,varGroup, vocabs, fnParts): |
---|
455 | self.errorCount = 0 |
---|
456 | assert type(varName) in [type('x'),type(u'x')], '1st argument to "check" method of checkGrids shound be a string variable name (not %s)' % type(varName) |
---|
457 | self.var = varName |
---|
458 | self.globalAts = globalAts |
---|
459 | self.varAts = varAts |
---|
460 | self.varGroup = varGroup |
---|
461 | self.vocabs = vocabs |
---|
462 | self.fnParts = fnParts |
---|
463 | self.runChecks() |
---|
464 | |
---|
465 | def getId(self): |
---|
466 | if self.fileId == None: |
---|
467 | id = self.globalAts['id'] |
---|
468 | ## |
---|
469 | ## hack to allow for awkward usage in DLR ozone, where id is different for each file |
---|
470 | ## may need to improve on this if problem exists for other files in different form |
---|
471 | ## |
---|
472 | xx = re_dlr01.findall( id ) |
---|
473 | if len(xx) != 0: |
---|
474 | id = id[:-19] |
---|
475 | if id != '': |
---|
476 | self.fileId = '%s.%s' % (self.globalAts['naming_authority'],id) |
---|
477 | if self.globalAts['naming_authority'] == 'uk.ac.pml': |
---|
478 | i0 = string.find(self.globalAts['id'],'OC4v6_QAA') |
---|
479 | if i0 != -1: |
---|
480 | self.fileId = '%s.%s' % (self.globalAts['naming_authority'],self.globalAts['id'][:i0+9]) |
---|
481 | else: |
---|
482 | self.fileId = '%s.:%s:%s' % (self.globalAts['naming_authority'],self.globalAts['title'],self.globalAts['time_coverage_duration']) |
---|
483 | |
---|
484 | def getDrs( self ): |
---|
485 | assert self.completed, 'method getDrs should not be called if checks have not been completed successfully' |
---|
486 | ee = {} |
---|
487 | drsDefaults = { 'convention_version':'n/a'} |
---|
488 | if not self.globalAts.has_key('product'): |
---|
489 | self.globalAts['product'] = 'output' |
---|
490 | for k in self.drsMappings: |
---|
491 | if self.drsMappings[k] == '@var': |
---|
492 | ee[k] = self.var |
---|
493 | elif self.drsMappings[k][0] == '=': |
---|
494 | ee[k] = self.drsMappings[k][1:] |
---|
495 | elif self.drsMappings[k] == '@ensemble': |
---|
496 | ee[k] = "r%si%sp%s" % (self.globalAts["realization"],self.globalAts["initialization_method"],self.globalAts["physics_version"]) |
---|
497 | elif self.drsMappings[k] == '@forecast_reference_time': |
---|
498 | x = self.globalAts.get("forecast_reference_time",'yyyy-mm-dd Thh:mm:ssZ' ) |
---|
499 | ee[k] = "%s%s%s" % (x[:4],x[5:7],x[8:10]) |
---|
500 | elif self.drsMappings[k] == '@mip_id': |
---|
501 | ee[k] = string.split( self.globalAts["table_id"] )[1] |
---|
502 | elif self.drsMappings[k] == '@ecv': |
---|
503 | ee[k] = self.pcfg.ecvMappings[ self.parent.fnDict['project'] ] |
---|
504 | elif self.drsMappings[k][0] == '$': |
---|
505 | self.pcfg.getExtraAtts() |
---|
506 | self.getId() |
---|
507 | if string.find(self.drsMappings[k],':') != -1: |
---|
508 | k2,dflt = string.split( self.drsMappings[k][1:],':') |
---|
509 | ee[k] = self.pcfg.extraAtts[self.fileId].get( k2, dflt ) |
---|
510 | else: |
---|
511 | ee[k] = self.pcfg.extraAtts[self.fileId][self.drsMappings[k][1:]] |
---|
512 | elif self.drsMappings[k][0] == '*': |
---|
513 | thisk = self.drsMappings[k][1:] |
---|
514 | ee[k] = self.varAts[self.var].get(thisk,'__none__') |
---|
515 | elif self.drsMappings[k][0] == '#': |
---|
516 | thisk = self.drsMappings[k][1:] |
---|
517 | if drsDefaults.has_key( thisk ): |
---|
518 | ee[k] = self.parent.fnDict.get(thisk, drsDefaults[thisk] ) |
---|
519 | else: |
---|
520 | ee[k] = self.parent.fnDict[thisk] |
---|
521 | else: |
---|
522 | ee[k] = self.globalAts[ self.drsMappings[k] ] |
---|
523 | |
---|
524 | for k in ['creation_date','tracking_id']: |
---|
525 | if k in self.globalAts.keys(): |
---|
526 | ee[k] = self.globalAts[k] |
---|
527 | |
---|
528 | return ee |
---|
529 | |
---|
530 | def do_check_ga(self): |
---|
531 | varName = self.var |
---|
532 | globalAts = self.globalAts |
---|
533 | varAts = self.varAts |
---|
534 | varGroup = self.varGroup |
---|
535 | vocabs = self.vocabs |
---|
536 | fnParts = self.fnParts |
---|
537 | |
---|
538 | self.completed = False |
---|
539 | self.checkId = ('001','global_ncattribute_present') |
---|
540 | m = [] |
---|
541 | for k in self.requiredGlobalAttributes: |
---|
542 | if not globalAts.has_key(k): |
---|
543 | m.append(k) |
---|
544 | self.globalAts[k] = '__errorReported__' |
---|
545 | |
---|
546 | if not self.test( len(m) == 0, 'Required global attributes missing: %s' % str(m) ): |
---|
547 | gaerr = True |
---|
548 | for k in m: |
---|
549 | self.parent.amapListDraft.append( '#@;%s=%s|%s=%s' % (k,'__absent__',k,'<insert attribute value and uncomment>') ) |
---|
550 | |
---|
551 | self.checkId = ('002','variable_in_group') |
---|
552 | |
---|
553 | |
---|
554 | self.test( varAts.has_key( varName ), 'Expected variable [%s] not present' % varName, abort=True, part=True ) |
---|
555 | msg = 'Variable %s not in table %s' % (varName,varGroup) |
---|
556 | |
---|
557 | self.test( vocabs['variable'].isInTable( varName, varGroup ), msg, abort=True, part=True ) |
---|
558 | |
---|
559 | if self.pcfg.checkVarType: |
---|
560 | self.checkId = ('003','variable_type') |
---|
561 | |
---|
562 | mipType = vocabs['variable'].getAttr( varName, varGroup, 'type' ) |
---|
563 | thisType = {'real':'float32', 'integer':'int32', 'float':'float32', 'double':'float64' }.get( mipType, mipType ) |
---|
564 | self.test( mipType == None or varAts[varName]['_type'] == thisType, 'Variable [%s/%s] not of type %s [%s]' % (varName,varGroup,str(thisType),varAts[varName]['_type']) ) |
---|
565 | else: |
---|
566 | mipType = None |
---|
567 | |
---|
568 | self.checkId = ('004','variable_ncattribute_present') |
---|
569 | m = [] |
---|
570 | reqAts = self.requiredVarAttributes[:] |
---|
571 | if (not self.parent.fileIsFixed) and self.pcfg.projectV.id in ['CORDEX']: |
---|
572 | reqAts.append( 'cell_methods' ) |
---|
573 | for k in reqAts + vocabs['variable'].lists(varName, 'addRequiredAttributes'): |
---|
574 | if not varAts[varName].has_key(k): |
---|
575 | m.append(k) |
---|
576 | if not self.test( len(m) == 0, 'Required variable attributes missing: %s' % str(m) ): |
---|
577 | vaerr = True |
---|
578 | for k in m: |
---|
579 | self.parent.amapListDraft.append( '#@var=%s;%s=%s|%s=%s' % (varName,k,'__absent__',k,'<insert attribute value and uncomment>') ) |
---|
580 | ## print self.parent.amapListDraft[-1] |
---|
581 | |
---|
582 | ## need to insert a check that variable is present |
---|
583 | self.checkId = ('005','variable_ncattribute_mipvalues') |
---|
584 | ok = True |
---|
585 | hm = varAts[varName].get( 'missing_value', None ) != None |
---|
586 | hf = varAts[varName].has_key( '_FillValue' ) |
---|
587 | if hm or hf: |
---|
588 | ## both are not required for SPECS. |
---|
589 | if self.pcfg.varTables=='CMIP' and self.pcfg.projectV.id != 'SPECS': |
---|
590 | ok &= self.test( hm, 'missing_value must be present if _FillValue is [%s]' % varName ) |
---|
591 | ok &= self.test( hf, '_FillValue must be present if missing_value is [%s]' % varName ) |
---|
592 | else: |
---|
593 | ok = True |
---|
594 | if mipType == 'real': |
---|
595 | if varAts[varName].has_key( 'missing_value' ): |
---|
596 | msg = 'Variable [%s] has incorrect attribute missing_value=%s [correct: %s]' % (varName,varAts[varName]['missing_value'],self.missingValue) |
---|
597 | ### need to use ctypes here when using ncq3 to read files -- appears OK for other libraries. |
---|
598 | ok &= self.test( ctypes.c_float(varAts[varName]['missing_value']).value == ctypes.c_float(self.missingValue).value, msg, part=True ) |
---|
599 | if varAts[varName].has_key( '_FillValue' ): |
---|
600 | msg = 'Variable [%s] has incorrect attribute _FillValue=%s [correct: %s]' % (varName,varAts[varName]['_FillValue'],self.missingValue) |
---|
601 | ok &= self.test( varAts[varName]['_FillValue'] == self.missingValue, msg, part=True ) |
---|
602 | |
---|
603 | mm = [] |
---|
604 | |
---|
605 | if self.pcfg.varTables=='CMIP': |
---|
606 | contAts = ['long_name', 'standard_name', 'units'] |
---|
607 | if not self.parent.fileIsFixed: |
---|
608 | ##if varGroup not in ['fx','fixed']: |
---|
609 | contAts.append( 'cell_methods' ) |
---|
610 | else: |
---|
611 | contAts = ['standard_name'] |
---|
612 | hcm = varAts[varName].has_key( "cell_methods" ) |
---|
613 | for k in contAts + vocabs['variable'].lists(varName,'addControlledAttributes'): |
---|
614 | targ = varAts[varName].get( k, 'Attribute not present' ) |
---|
615 | val = vocabs['variable'].getAttr( varName, varGroup, k ) |
---|
616 | |
---|
617 | if k == "standard_name": |
---|
618 | if val.find( ' ' ) != -1: |
---|
619 | val = string.join( string.split(val,maxsplit=1) ) |
---|
620 | if targ.find( ' ' ) != -1: |
---|
621 | targ = string.join( string.split(targ,maxsplit=1) ) |
---|
622 | |
---|
623 | if k == "cell_methods": |
---|
624 | if val != None: |
---|
625 | parenthesies1 = [] |
---|
626 | targ0 = targ[:] |
---|
627 | while string.find( targ, '(' ) != -1: |
---|
628 | i0 = targ.index( '(' ) |
---|
629 | i1 = targ.index( ')' ) |
---|
630 | parenthesies1.append( targ[i0:i1+1] ) |
---|
631 | targ = targ[:i0-1] + targ[i1+1:] |
---|
632 | parenthesies2 = [] |
---|
633 | val0 = val[:] |
---|
634 | while string.find( val, '(' ) != -1: |
---|
635 | i0 = val.index( '(' ) |
---|
636 | i1 = val.index( ')' ) |
---|
637 | parenthesies2.append( val[i0:i1+1] ) |
---|
638 | val = val[:i0-1] + val[i1+1:] |
---|
639 | for p in parenthesies2: |
---|
640 | if p not in parenthesies1: |
---|
641 | mm.append( (k,parenthesies1,p) ) |
---|
642 | if string.find( targ, val): |
---|
643 | mm.append( (k,targ,val) ) |
---|
644 | elif targ != 'Attribute not present' and targ != val: |
---|
645 | mm.append( (k,targ,val) ) |
---|
646 | |
---|
647 | ok &= self.test( len(mm) == 0, 'Variable [%s] has incorrect attributes: %s' % (varName, strmm3(mm)), part=True ) |
---|
648 | if len( mm ) != 0: |
---|
649 | if self.parent.amapListDraft == None: |
---|
650 | self.parent.amapListDraft = [] |
---|
651 | for m in mm: |
---|
652 | if m[2] == None: |
---|
653 | self.parent.amapListDraft.append( '## @var=%s;%s=%s|@delete=%s -- not supported yet' % (varName,m[0],m[1],m[0]) ) |
---|
654 | else: |
---|
655 | self.parent.amapListDraft.append( '@var=%s;%s=%s|%s=%s' % (varName,m[0],m[1],m[0],m[2]) ) |
---|
656 | |
---|
657 | if ok: |
---|
658 | self.log_pass() |
---|
659 | |
---|
660 | if (not self.parent.fileIsFixed) and hcm: |
---|
661 | ## if (varGroup not in ['fx','fixed']) and hcm: |
---|
662 | self.isInstantaneous = string.find( varAts[varName]['cell_methods'], 'time: point' ) != -1 |
---|
663 | else: |
---|
664 | self.isInstantaneous = True |
---|
665 | |
---|
666 | self.checkId = ('006','global_ncattribute_cv' ) |
---|
667 | m = [] |
---|
668 | for a in self.controlledGlobalAttributes: |
---|
669 | if globalAts.has_key(a): |
---|
670 | try: |
---|
671 | if not vocabs[a].check( str(globalAts[a]) ): |
---|
672 | m.append( (a,globalAts[a],vocabs[a].note) ) |
---|
673 | except: |
---|
674 | print 'failed trying to check global attribute %s' % a |
---|
675 | raise baseException( 'failed trying to check global attribute %s' % a ) |
---|
676 | |
---|
677 | if not self.test( len(m) == 0, 'Global attributes do not match constraints: %s' % str(m) ): |
---|
678 | for t in m: |
---|
679 | self.parent.amapListDraft.append( '#@;%s=%s|%s=%s' % (t[0],str(t[1]),t[0],'<insert attribute value and uncomment>' + str(t[2]) ) ) |
---|
680 | |
---|
681 | self.checkId = ('007','filename_filemetadata_consistency') |
---|
682 | m = [] |
---|
683 | for i in range(len(self.globalAttributesInFn)): |
---|
684 | gaif = self.globalAttributesInFn[i] |
---|
685 | if gaif != None and gaif[0] != '*': |
---|
686 | if gaif[-1] == ':': |
---|
687 | bits = string.split(gaif,':') |
---|
688 | gaif0 = gaif |
---|
689 | gaif = bits[0] |
---|
690 | ix = int(bits[1]) |
---|
691 | else: |
---|
692 | ix = i |
---|
693 | |
---|
694 | targVal = fnParts[ix] |
---|
695 | if gaif[0] == "@": |
---|
696 | if gaif[1:] == "mip_id": |
---|
697 | bits = string.split( globalAts[ "table_id" ] ) |
---|
698 | if len( bits ) > 2 and bits[0] == "Table": |
---|
699 | thisVal = bits[1] |
---|
700 | else: |
---|
701 | thisVal = globalAts[ "table_id" ] |
---|
702 | self.test( False, 'Global attribute table_id does not conform to CMOR pattern ["Table ......"]: %s' % thisVal, part=True) |
---|
703 | elif gaif[1:] == "ensemble": |
---|
704 | thisVal = "r%si%sp%s" % (globalAts["realization"],globalAts["initialization_method"],globalAts["physics_version"]) |
---|
705 | ## following mappings are depricated -- introduced for SPECS and withdrawn --- |
---|
706 | elif gaif[1:] == "experiment_family": |
---|
707 | thisVal = globalAts["experiment_id"][:-4] |
---|
708 | elif gaif[1:] == "forecast_reference_time": |
---|
709 | x = self.globalAts.get("forecast_reference_time",'yyyy-mm-dd Thh:mm:ssZ' ) |
---|
710 | thisVal = "S%s%s%s" % (x[:4],x[5:7],x[8:10]) |
---|
711 | elif gaif[1:] == "series": |
---|
712 | thisVal = 'series%s' % globalAts["series"] |
---|
713 | else: |
---|
714 | assert False, "Not coded to deal with this configuration: globalAttributesInFn[%s]=%s" % (i,self.globalAttributesInFn[i]) |
---|
715 | |
---|
716 | else: |
---|
717 | thisVal = globalAts[gaif] |
---|
718 | |
---|
719 | if thisVal not in [targVal,'__errorReported__']: |
---|
720 | m.append( (i,self.globalAttributesInFn[i]) ) |
---|
721 | |
---|
722 | self.test( len(m) == 0,'File name segments do not match corresponding global attributes: %s' % str(m) ) |
---|
723 | |
---|
724 | self.completed = True |
---|
725 | |
---|
726 | class checkStandardDims(checkBase): |
---|
727 | """Check the dimensions which are defined in the specifications""" |
---|
728 | |
---|
729 | def init(self): |
---|
730 | self.id = 'C4.003' |
---|
731 | self.checkId = 'unset' |
---|
732 | self.step = 'Initialised' |
---|
733 | self.checks = (self.do_check,) |
---|
734 | self.plevRequired = self.pcfg.plevRequired |
---|
735 | self.plevValues = self.pcfg.plevValues |
---|
736 | self.heightRequired = self.pcfg.heightRequired |
---|
737 | self.heightValues = self.pcfg.heightValues |
---|
738 | self.heightRange = self.pcfg.heightRange |
---|
739 | |
---|
740 | def check(self,varName,varGroup, da, va, isInsta,vocabs): |
---|
741 | self.errorCount = 0 |
---|
742 | assert type(varName) in [type('x'),type(u'x')], '1st argument to "check" method of checkGrids shound be a string variable name (not %s)' % type(varName) |
---|
743 | self.var = varName |
---|
744 | self.varGroup = varGroup |
---|
745 | self.da = da |
---|
746 | self.va = va |
---|
747 | self.isInsta = isInsta |
---|
748 | self.vocabs = vocabs |
---|
749 | self.runChecks() |
---|
750 | |
---|
751 | def do_check(self): |
---|
752 | varName = self.var |
---|
753 | varGroup = self.varGroup |
---|
754 | da = self.da |
---|
755 | va = self.va |
---|
756 | isInsta = self.isInsta |
---|
757 | |
---|
758 | self.errorCount = 0 |
---|
759 | self.completed = False |
---|
760 | self.checkId = ('001','time_attributes') |
---|
761 | self.calendar = 'None' |
---|
762 | if not self.parent.fileIsFixed: |
---|
763 | ## if varGroup not in ['fx','fixed']: |
---|
764 | ok = True |
---|
765 | self.test( 'time' in da.keys(), 'Time dimension not found' , abort=True, part=True ) |
---|
766 | if self.pcfg.varTables=='CMIP': |
---|
767 | if not isInsta: |
---|
768 | ok &= self.test( da['time'].get( 'bounds', 'xxx' ) == 'time_bnds', 'Required bounds attribute not present or not correct value', part=True ) |
---|
769 | |
---|
770 | ## is time zone designator needed? |
---|
771 | tunits = da['time'].get( 'units', 'xxx' ) |
---|
772 | if self.project == 'CORDEX': |
---|
773 | ok &= self.test( tunits in ["days since 1949-12-01 00:00:00Z", "days since 1949-12-01 00:00:00", "days since 1949-12-01"], |
---|
774 | 'Time units [%s] attribute not set correctly to "days since 1949-12-01 00:00:00Z"' % tunits, part=True ) |
---|
775 | else: |
---|
776 | ok &= self.test( tunits[:10] == "days since", 'time units [%s] attribute not set correctly to "days since ....."' % tunits, part=True ) |
---|
777 | |
---|
778 | ok &= self.test( da['time'].has_key( 'calendar' ), 'Time: required attribute calendar missing', part=True ) |
---|
779 | |
---|
780 | ok &= self.test( da['time']['_type'] in ["float64","double"], 'Time: data type not float64 [%s]' % da['time']['_type'], part=True ) |
---|
781 | |
---|
782 | if ok: |
---|
783 | self.log_pass() |
---|
784 | self.calendar = da['time'].get( 'calendar', 'None' ) |
---|
785 | |
---|
786 | self.checkId = ('002','pressure_levels') |
---|
787 | if varName in self.plevRequired: |
---|
788 | ok = True |
---|
789 | self.test( 'plev' in va.keys(), 'plev coordinate not found %s' % str(va.keys()), abort=True, part=True ) |
---|
790 | |
---|
791 | ok &= self.test( int( va['plev']['_data'][0] ) == self.plevValues[varName], \ |
---|
792 | 'plev value [%s] does not match required [%s]' % (va['plev']['_data'],self.plevValues[varName] ), part=True ) |
---|
793 | |
---|
794 | plevAtDict = {'standard_name':"air_pressure", \ |
---|
795 | 'long_name':"pressure", \ |
---|
796 | 'units':"Pa", \ |
---|
797 | 'positive':"down", \ |
---|
798 | 'axis':"Z" } |
---|
799 | |
---|
800 | if varName in ['clh','clm','cll']: |
---|
801 | plevAtDict['bounds']= "plev_bnds" |
---|
802 | |
---|
803 | for k in plevAtDict.keys(): |
---|
804 | ok &= self.test( va['plev'].get( k, None ) == plevAtDict[k], |
---|
805 | 'plev attribute %s absent or wrong value (should be %s)' % (k,plevAtDict[k]), part=True ) |
---|
806 | |
---|
807 | if varName in ['clh','clm','cll']: |
---|
808 | self.test( "plev_bnds" in va.keys(), 'plev_bnds variable not found %s' % str(va.keys()), abort=True, part=True ) |
---|
809 | mm = [] |
---|
810 | for k in plevAtDict.keys(): |
---|
811 | if k != 'bounds' and k in va['plev_bnds'].keys(): |
---|
812 | if va['plev_bnds'][k] != va['plev'][k]: |
---|
813 | mm.append(k) |
---|
814 | ok &= self.test( len(mm) == 0, 'Attributes of plev_bnds do not match those of plev: %s' % str(mm), part=True ) |
---|
815 | |
---|
816 | bndsVals = {'clh':[44000, 0], 'clm':[68000, 44000], 'cll':[100000, 68000] } |
---|
817 | res = self.test( len( va['plev_bnds']['_data'] ) == 2, 'plev_bnds array is of wrong length', part=True ) |
---|
818 | ok &= res |
---|
819 | if res: |
---|
820 | kk = 0 |
---|
821 | for i in [0,1]: |
---|
822 | if int(va['plev_bnds']['_data'][i]) != bndsVals[varName][i]: |
---|
823 | kk+=1 |
---|
824 | ok &= self.test( kk == 0, 'plev_bnds values not correct: should be %s' % str(bndsVals[varName]), part=True ) |
---|
825 | |
---|
826 | if ok: |
---|
827 | self.log_pass() |
---|
828 | |
---|
829 | self.checkId = ('003','height_levels') |
---|
830 | hreq = varName in self.heightRequired |
---|
831 | if self.parent.experimental: |
---|
832 | print 'utils_c4: ', varName, self.vocabs['variable'].varcons[varGroup][varName].get( '_dimension',[]) |
---|
833 | hreq = "height2m" in self.vocabs['variable'].varcons[varGroup][varName].get( '_dimension',[]) |
---|
834 | if hreq: |
---|
835 | print 'testing height, var=%s' % varName |
---|
836 | if hreq: |
---|
837 | heightAtDict = {'long_name':"height", 'standard_name':"height", 'units':"m", 'positive':"up", 'axis':"Z" } |
---|
838 | ok = True |
---|
839 | ok &= self.test( 'height' in va.keys(), 'height coordinate not found %s' % str(va.keys()), abort=True, part=True ) |
---|
840 | ##ok &= self.test( abs( va['height']['_data'] - self.heightValues[varName]) < 0.001, \ |
---|
841 | ##'height value [%s] does not match required [%s]' % (va['height']['_data'],self.heightValues[varName] ), part=True ) |
---|
842 | |
---|
843 | ok1 = self.test( len( va['height']['_data'] ) == 1, 'More height values (%s) than expected (1)' % (len( va['height']['_data'])), part=True ) |
---|
844 | if ok1: |
---|
845 | r = self.heightRange[varName] |
---|
846 | ok1 &= self.test( r[0] <= va['height']['_data'][0] <= r[1], \ |
---|
847 | 'height value [%s] not in specified range [%s]' % (va['height']['_data'], (self.heightRange[varName] ) ), part=True ) |
---|
848 | |
---|
849 | ok &= ok1 |
---|
850 | |
---|
851 | for k in heightAtDict.keys(): |
---|
852 | val = va['height'].get( k, "none" ) |
---|
853 | if not self.test( val == heightAtDict[k], \ |
---|
854 | 'height attribute %s absent or wrong value (should be %s)' % (k,heightAtDict[k]), part=True ): |
---|
855 | self.parent.amapListDraft.append( '@var=%s;%s=%s|%s=%s' % ('height',k,val,k,heightAtDict[k]) ) |
---|
856 | ok = False |
---|
857 | |
---|
858 | if ok: |
---|
859 | self.log_pass() |
---|
860 | |
---|
861 | self.completed = True |
---|
862 | |
---|
863 | class checkGrids(checkBase): |
---|
864 | |
---|
865 | def init(self): |
---|
866 | self.id = 'C4.004' |
---|
867 | self.checkId = 'unset' |
---|
868 | self.step = 'Initialised' |
---|
869 | self.checks = (self.do_check_rp,self.do_check_intd) |
---|
870 | |
---|
871 | def check(self,varName, domain, da, va): |
---|
872 | self.errorCount = 0 |
---|
873 | assert type(varName) in [type('x'),type(u'x')], '1st argument to "check" method of checkGrids shound be a string variable name (not %s)' % type(varName) |
---|
874 | self.var = varName |
---|
875 | self.domain = domain |
---|
876 | self.da = da |
---|
877 | self.va = va |
---|
878 | |
---|
879 | self.runChecks() |
---|
880 | ##for c in self.checks: |
---|
881 | ##c() |
---|
882 | ##self.do_check_rp() |
---|
883 | ##self.do_check_intd() |
---|
884 | |
---|
885 | def do_check_rp(self): |
---|
886 | varName = self.var |
---|
887 | domain = self.domain |
---|
888 | da = self.da |
---|
889 | va = self.va |
---|
890 | if va[varName].get( 'grid_mapping', None ) == "rotated_pole": |
---|
891 | self.checkId = ('001','grid_mapping') |
---|
892 | atDict = { 'grid_mapping_name':'rotated_latitude_longitude' } |
---|
893 | atDict['grid_north_pole_latitude'] = self.pcfg.rotatedPoleGrids[domain]['grid_np_lat'] |
---|
894 | if self.pcfg.rotatedPoleGrids[domain]['grid_np_lon'] != 'N/A': |
---|
895 | atDict['grid_north_pole_longitude'] = self.pcfg.rotatedPoleGrids[domain]['grid_np_lon'] |
---|
896 | |
---|
897 | self.checkId = ('002','rotated_latlon_attributes') |
---|
898 | self.test( 'rlat' in da.keys() and 'rlon' in da.keys(), 'rlat and rlon not found (required for grid_mapping = rotated_pole )', abort=True, part=True ) |
---|
899 | |
---|
900 | atDict = {'rlat':{'long_name':"rotated latitude", 'standard_name':"grid_latitude", 'units':"degrees", 'axis':"Y", '_type':'float64'}, |
---|
901 | 'rlon':{'long_name':"rotated longitude", 'standard_name':"grid_longitude", 'units':"degrees", 'axis':"X", '_type':'float64'} } |
---|
902 | mm = [] |
---|
903 | for k in ['rlat','rlon']: |
---|
904 | for k2 in atDict[k].keys(): |
---|
905 | if atDict[k][k2] != da[k].get(k2, None ): |
---|
906 | mm.append( (k,k2) ) |
---|
907 | record = '#@ax=%s;%s=%s|%s=%s <uncomment if correct>' % (k,k2,da[k].get(k2, '__missing__'),k2,atDict[k][k2] ) |
---|
908 | self.parent.amapListDraft.append( record ) |
---|
909 | self.test( len(mm) == 0, 'Required attributes of grid coordinate arrays not correct: %s' % str(mm) ) |
---|
910 | |
---|
911 | self.checkId = ('003','rotated_latlon_domain') |
---|
912 | ok = True |
---|
913 | for k in ['rlat','rlon']: |
---|
914 | res = len(da[k]['_data']) == self.pcfg.rotatedPoleGrids[domain][ {'rlat':'nlat','rlon':'nlon' }[k] ] |
---|
915 | if not res: |
---|
916 | self.test( res, 'Size of %s dimension does not match specification (%s,%s)' % (k,a,b), part=True ) |
---|
917 | ok = False |
---|
918 | |
---|
919 | a = ( da['rlat']['_data'][0], da['rlat']['_data'][-1], da['rlon']['_data'][0], da['rlon']['_data'][-1] ) |
---|
920 | b = map( lambda x: self.pcfg.rotatedPoleGrids[domain][x], ['s','n','w','e'] ) |
---|
921 | mm = [] |
---|
922 | for i in range(4): |
---|
923 | if abs(a[i] - b[i]) > self.pcfg.gridSpecTol: |
---|
924 | mm.append( (a[i],b[i]) ) |
---|
925 | |
---|
926 | ok &= self.test( len(mm) == 0, 'Domain boundaries for rotated pole grid do not match %s within tolerance (%s)' % (str(mm),self.pcfg.gridSpecTol), part=True ) |
---|
927 | |
---|
928 | for k in ['rlat','rlon']: |
---|
929 | ok &= self.test( cs.check( da[k]['_data'] ), '%s values not evenly spaced -- min/max delta = %s, %s' % (k,cs.dmn,cs.dmx), part=True ) |
---|
930 | |
---|
931 | if ok: |
---|
932 | self.log_pass() |
---|
933 | |
---|
934 | def do_check_intd(self): |
---|
935 | varName = self.var |
---|
936 | domain = self.domain |
---|
937 | da = self.da |
---|
938 | va = self.va |
---|
939 | if domain[-1] == 'i': |
---|
940 | self.checkId = ('004','regular_grid_attributes') |
---|
941 | self.test( 'lat' in da.keys() and 'lon' in da.keys(), 'lat and lon not found (required for interpolated data)', abort=True, part=True ) |
---|
942 | |
---|
943 | atDict = {'lat':{'long_name':"latitude", 'standard_name':"latitude", 'units':"degrees_north", '_type':'float64'}, |
---|
944 | 'lon':{'long_name':"longitude", 'standard_name':"longitude", 'units':"degrees_east", '_type':'float64'} } |
---|
945 | mm = [] |
---|
946 | for k in ['lat','lon']: |
---|
947 | for k2 in atDict[k].keys(): |
---|
948 | if atDict[k][k2] != da[k].get(k2, None ): |
---|
949 | mm.append( (k,k2) ) |
---|
950 | record = '#@ax=%s;%s=%s|%s=%s <uncomment if correct>' % (k,k2,da[k].get(k2, '__missing__'),k2,atDict[k][k2] ) |
---|
951 | self.parent.amapListDraft.append( record ) |
---|
952 | |
---|
953 | self.test( len(mm) == 0, 'Required attributes of grid coordinate arrays not correct: %s' % str(mm), part=True ) |
---|
954 | |
---|
955 | ok = True |
---|
956 | self.checkId = ('005','regular_grid_domain') |
---|
957 | for k in ['lat','lon']: |
---|
958 | res = len(da[k]['_data']) >= self.pcfg.interpolatedGrids[domain][ {'lat':'nlat','lon':'nlon' }[k] ] |
---|
959 | if not res: |
---|
960 | a,b = len(da[k]['_data']), self.pcfg.interpolatedGrids[domain][ {'lat':'nlat','lon':'nlon' }[k] ] |
---|
961 | self.test( res, 'Size of %s dimension does not match specification (%s,%s)' % (k,a,b), part=True ) |
---|
962 | ok = False |
---|
963 | |
---|
964 | a = ( da['lat']['_data'][0], da['lat']['_data'][-1], da['lon']['_data'][0], da['lon']['_data'][-1] ) |
---|
965 | b = map( lambda x: self.pcfg.interpolatedGrids[domain][x], ['s','n','w','e'] ) |
---|
966 | rs = self.pcfg.interpolatedGrids[domain]['res'] |
---|
967 | c = [-rs,rs,-rs,rs] |
---|
968 | mm = [] |
---|
969 | for i in range(4): |
---|
970 | if a[i] != b[i]: |
---|
971 | x = (a[i]-b[i])/c[i] |
---|
972 | if x < 0 or abs( x - int(x) ) > 0.001: |
---|
973 | skipThis = False |
---|
974 | if self.project == 'CORDEX': |
---|
975 | if domain[:3] == 'ANT': |
---|
976 | if i == 2 and abs( a[i] - 0.25 ) < 0.001: |
---|
977 | skipThis = True |
---|
978 | elif i == 3 and abs( a[i] - 359.75 ) < 0.001: |
---|
979 | skipThis = True |
---|
980 | if not skipThis: |
---|
981 | mm.append( (a[i],b[i]) ) |
---|
982 | |
---|
983 | ok &= self.test( len(mm) == 0, 'Interpolated grid boundary error: File %s; Req. %s' % (str(a),str(b)), part=True ) |
---|
984 | |
---|
985 | for k in ['lat','lon']: |
---|
986 | ok &= self.test( cs.check( da[k]['_data'] ), '%s values not evenly spaced -- min/max delta = %s, %s' % (k,cs.dmn,cs.dmx), part=True ) |
---|
987 | if ok: |
---|
988 | self.log_pass() |
---|
989 | |
---|
990 | class mipVocab(object): |
---|
991 | |
---|
992 | def __init__(self,pcfg,dummy=False): |
---|
993 | self.pcfg = pcfg |
---|
994 | if dummy: |
---|
995 | self.dummyMipTable() |
---|
996 | elif pcfg.varTables=='CMIP': |
---|
997 | self.ingestMipTables() |
---|
998 | elif pcfg.varTables=='FLAT': |
---|
999 | self.flatTable() |
---|
1000 | |
---|
1001 | def ingestMipTables(self): |
---|
1002 | dir, tl, vgmap, fnpat = self.pcfg.mipVocabPars |
---|
1003 | ms = mipTableScan() |
---|
1004 | self.varInfo = {} |
---|
1005 | self.varcons = {} |
---|
1006 | for f in tl: |
---|
1007 | vg = vgmap.get( f, f ) |
---|
1008 | if vg not in self.varcons: |
---|
1009 | self.varcons[vg] = {} |
---|
1010 | fn = fnpat % f |
---|
1011 | ll = open( '%s%s' % (dir,fn) ).readlines() |
---|
1012 | ee = ms.scan_table(ll,None,asDict=True) |
---|
1013 | for v in ee.keys(): |
---|
1014 | ## set global default: type float |
---|
1015 | eeee = { 'type':self.pcfg.defaults.get( 'variableDataType', 'float' ) } |
---|
1016 | eeee['_dimension'] = ee[v][0] |
---|
1017 | ar = [] |
---|
1018 | ac = [] |
---|
1019 | for a in ee[v][1].keys(): |
---|
1020 | eeee[a] = ee[v][1][a] |
---|
1021 | ##if 'positive' in eeee.keys(): |
---|
1022 | ##ar.append( 'positive' ) |
---|
1023 | ##ac.append( 'positive' ) |
---|
1024 | self.varInfo[v] = {'ar':ar, 'ac':ac } |
---|
1025 | self.varcons[vg][v] = eeee |
---|
1026 | |
---|
1027 | def dummyMipTable(self): |
---|
1028 | self.varInfo = {} |
---|
1029 | self.varcons = {} |
---|
1030 | ee = { 'standard_name':'sn%s', 'long_name':'n%s', 'units':'1' } |
---|
1031 | dir, tl, vgmap, fnpat = self.pcfg.mipVocabPars |
---|
1032 | for f in tl: |
---|
1033 | vg = vgmap.get( f, f ) |
---|
1034 | self.varcons[vg] = {} |
---|
1035 | for i in range(12): |
---|
1036 | v = 'v%s' % i |
---|
1037 | eeee = {} |
---|
1038 | eeee['standard_name'] = ee['standard_name'] % i |
---|
1039 | eeee['long_name'] = ee['long_name'] % i |
---|
1040 | eeee['cell_methods'] = 'time: point' |
---|
1041 | eeee['units'] = ee['units'] |
---|
1042 | eeee['type'] = 'float' |
---|
1043 | ar = [] |
---|
1044 | ac = [] |
---|
1045 | self.varInfo[v] = {'ar':ar, 'ac':ac } |
---|
1046 | self.varcons[vg][v] = eeee |
---|
1047 | |
---|
1048 | def flatTable(self): |
---|
1049 | self.varInfo = {} |
---|
1050 | self.varcons = {} |
---|
1051 | dir, tl, vgm, fn = self.pcfg.mipVocabPars |
---|
1052 | vg = vgm.keys()[0] |
---|
1053 | ee = { 'standard_name':'sn%s', 'long_name':'n%s', 'units':'1' } |
---|
1054 | ll = open( '%s%s' % (dir,fn) ).readlines() |
---|
1055 | self.varcons[vg] = {} |
---|
1056 | for l in ll: |
---|
1057 | if l[0] != '#': |
---|
1058 | bits = string.split( string.strip(l), '|' ) |
---|
1059 | if len(bits) == 2: |
---|
1060 | p1,p2 = bits |
---|
1061 | else: |
---|
1062 | p1 = l |
---|
1063 | p2 = None |
---|
1064 | dt, v, sn = string.split( string.strip(p1), maxsplit=2 ) |
---|
1065 | if p2 != None: |
---|
1066 | bits = string.split( string.strip(p2), '=' ) |
---|
1067 | eex = { bits[0]:bits[1] } |
---|
1068 | else: |
---|
1069 | eex = None |
---|
1070 | self.pcfg.fnvdict[dt] = { 'v':v, 'sn':sn, 'ex':eex } |
---|
1071 | ar = [] |
---|
1072 | ac = [] |
---|
1073 | self.varInfo[v] = {'ar':ar, 'ac':ac } |
---|
1074 | self.varcons[vg][v] = {'standard_name':sn, 'type':'float' } |
---|
1075 | |
---|
1076 | def lists( self, k, k2 ): |
---|
1077 | if k2 == 'addRequiredAttributes': |
---|
1078 | return self.varInfo[k]['ar'] |
---|
1079 | elif k2 == 'addControlledAttributes': |
---|
1080 | return self.varInfo[k]['ac'] |
---|
1081 | else: |
---|
1082 | raise baseException( 'mipVocab.lists called with bad list specifier %s' % k2 ) |
---|
1083 | |
---|
1084 | def isInTable( self, v, vg1 ): |
---|
1085 | vg = vg1 |
---|
1086 | if vg == 'ESA': |
---|
1087 | vg = 'ESACCI' |
---|
1088 | |
---|
1089 | assert vg in self.varcons.keys(), '%s not found in self.varcons.keys() [%s]' % (vg,str(self.varcons.keys()) ) |
---|
1090 | return (v in self.varcons[vg].keys()) |
---|
1091 | |
---|
1092 | def getAttr( self, v, vg1, a ): |
---|
1093 | vg = vg1 |
---|
1094 | if vg == 'ESA': |
---|
1095 | vg = 'ESACCI' |
---|
1096 | assert vg in self.varcons.keys(), '%s not found in self.varcons.keys()' |
---|
1097 | assert v in self.varcons[vg].keys(), '%s not found in self.varcons[%s].keys()' % (v,vg) |
---|
1098 | |
---|
1099 | return self.varcons[vg][v][a] |
---|
1100 | |
---|
1101 | class patternControl(object): |
---|
1102 | |
---|
1103 | def __init__(self,tag,pattern,list=None,cls=None,examples=None,badExamples=None,runTest=True): |
---|
1104 | if cls != None: |
---|
1105 | assert cls in ['ISO'], 'value of cls [%s] not recognised' % cls |
---|
1106 | if cls == 'ISO': |
---|
1107 | assert pattern in ['ISO8601 duration'], 'value of pattern [%s] for ISO constraint not recognised' % pattern |
---|
1108 | if pattern == 'ISO8601 duration': |
---|
1109 | thispat = '^(P([0-9]+Y){0,1}([0-9]+M){0,1}([0-9]+D){0,1}(T([0-9]+H){0,1}([0-9]+M){0,1}([0-9]+(.[0-9]+){0,1}S){0,1}){0,1})$|^(P[0-9]+W)$' |
---|
1110 | self.re_pat = re.compile( thispat ) |
---|
1111 | self.pattern = thispat |
---|
1112 | self.pattern_src = pattern |
---|
1113 | else: |
---|
1114 | try: |
---|
1115 | self.re_pat = re.compile( pattern ) |
---|
1116 | except: |
---|
1117 | print "Failed to compile pattern >>%s<< (%s)" % (pattern, tag) |
---|
1118 | self.pattern = pattern |
---|
1119 | |
---|
1120 | self.examples = examples |
---|
1121 | self.badExamples = badExamples |
---|
1122 | self.list = list |
---|
1123 | self.cls = cls |
---|
1124 | |
---|
1125 | if runTest: |
---|
1126 | if examples != None: |
---|
1127 | for e in examples: |
---|
1128 | assert self.check(e), 'Internal check failed: example %s does not fit pattern %s' % (e,self.pattern) |
---|
1129 | |
---|
1130 | def check(self,val): |
---|
1131 | self.note = '-' |
---|
1132 | m = self.re_pat.match( val ) |
---|
1133 | if self.list == None: |
---|
1134 | self.note = "simple test" |
---|
1135 | return m != None |
---|
1136 | else: |
---|
1137 | if m == None: |
---|
1138 | self.note = "no match %s::%s" % (val,self.pattern) |
---|
1139 | return False |
---|
1140 | if not m.groupdict().has_key("val"): |
---|
1141 | self.note = "no 'val' in match" |
---|
1142 | return False |
---|
1143 | self.note = "val=%s" % m.groupdict()["val"] |
---|
1144 | return m.groupdict()["val"] in self.list |
---|
1145 | |
---|
1146 | class listControl(object): |
---|
1147 | def __init__(self,tag,list,split=False,splitVal=None,enumeration=False): |
---|
1148 | self.list = list |
---|
1149 | self.tag = tag |
---|
1150 | self.split = split |
---|
1151 | self.splitVal = splitVal |
---|
1152 | self.enumeration = enumeration |
---|
1153 | self.etest = re.compile( '(.*)<([0-9]+(,[0-9]+)*)>' ) |
---|
1154 | self.essplit = re.compile(r'(?:[^\s,<]|<(?:\\.|[^>])*>)+') |
---|
1155 | |
---|
1156 | def check(self,val): |
---|
1157 | self.note = '-' |
---|
1158 | if len(self.list) < 4: |
---|
1159 | self.note = str( self.list ) |
---|
1160 | else: |
---|
1161 | self.note = str( self.list[:4] ) |
---|
1162 | if self.split: |
---|
1163 | if self.splitVal == None: |
---|
1164 | vs = string.split( val ) |
---|
1165 | elif self.enumeration: |
---|
1166 | vs = map( string.strip, self.essplit.findall( val ) ) |
---|
1167 | else: |
---|
1168 | vs = map( string.strip, string.split( val, self.splitVal ) ) |
---|
1169 | else: |
---|
1170 | vs = [val,] |
---|
1171 | if self.enumeration: |
---|
1172 | vs2 = [] |
---|
1173 | for v in vs: |
---|
1174 | m = self.etest.findall( v ) |
---|
1175 | if m in [None,[]]: |
---|
1176 | vs2.append( v ) |
---|
1177 | else: |
---|
1178 | opts = string.split( m[0][1], ',' ) |
---|
1179 | for o in opts: |
---|
1180 | vs2.append( '%s%s' % (m[0][0],o) ) |
---|
1181 | vs = vs2[:] |
---|
1182 | |
---|
1183 | return all( map( lambda x: x in self.list, vs ) ) |
---|
1184 | |
---|
1185 | |
---|
1186 | class checkByVar(checkBase): |
---|
1187 | """Run some checks on groups of files with a common variable. Checks for continuity of time in group""" |
---|
1188 | |
---|
1189 | def init(self,fileNameSeparator='_'): |
---|
1190 | self.id = 'C5.001' |
---|
1191 | self.checkId = 'unset' |
---|
1192 | self.step = 'Initialised' |
---|
1193 | self.checks = (self.checkTrange,) |
---|
1194 | self.fnsep = fileNameSeparator |
---|
1195 | |
---|
1196 | def setLogDict( self,fLogDict ): |
---|
1197 | self.fLogDict = fLogDict |
---|
1198 | |
---|
1199 | def impt(self,flist): |
---|
1200 | """Scan a list of files and identify groups which a common variable and extract time ranges into a dictionary of lists, keyed on group identifiers. |
---|
1201 | :param flist: List of file names. |
---|
1202 | |
---|
1203 | This routine has rather obscure nested logical tests used to identify the group to which a file belongs. The complexity arises from the fact that the identification of the files that should form a continuous time series from the file names alone is not a standardised feature of the file names.""" |
---|
1204 | ee = {} |
---|
1205 | elist = [] |
---|
1206 | for f in flist: |
---|
1207 | fn = string.split(f, '/' )[-1] |
---|
1208 | fnParts = string.split( fn[:-3], self.fnsep ) |
---|
1209 | |
---|
1210 | try: |
---|
1211 | if self.pcfg.freqIndex != None: |
---|
1212 | freq = fnParts[self.pcfg.freqIndex] |
---|
1213 | else: |
---|
1214 | freq = None |
---|
1215 | |
---|
1216 | group = fnParts[ self.pcfg.groupIndex ] |
---|
1217 | |
---|
1218 | if self.parent.fileIsFixed: |
---|
1219 | trange = None |
---|
1220 | else: |
---|
1221 | trange = string.split( fnParts[-1], '-' ) |
---|
1222 | var = fnParts[self.pcfg.varIndex] |
---|
1223 | thisKey = string.join( fnParts[:-1], '.' ) |
---|
1224 | if group not in ee.keys(): |
---|
1225 | ee[group] = {} |
---|
1226 | if thisKey not in ee[group].keys(): |
---|
1227 | ee[group][thisKey] = [] |
---|
1228 | ee[group][thisKey].append( (f,fn,group,trange) ) |
---|
1229 | except: |
---|
1230 | print 'Cannot parse file name: %s' % (f) |
---|
1231 | elist.append(f) |
---|
1232 | ## this ee entry is not used, except in bookkeeping check below. |
---|
1233 | ## parsing of file name is repeated later, and a error log entry is created at that stage -- this could be improved. |
---|
1234 | ## in order to improve, need to clarify flow of program: the list here is used to provide preliminary info before log files etc are set up. |
---|
1235 | group = '__error__' |
---|
1236 | thisKey = fn |
---|
1237 | if group not in ee.keys(): |
---|
1238 | ee[group] = {} |
---|
1239 | if thisKey not in ee[group].keys(): |
---|
1240 | ee[group][thisKey] = [] |
---|
1241 | ee[group][thisKey].append( (f,fn,group) ) |
---|
1242 | |
---|
1243 | nn = len(flist) |
---|
1244 | n2 = 0 |
---|
1245 | for k in ee.keys(): |
---|
1246 | for k2 in ee[k].keys(): |
---|
1247 | n2 += len( ee[k][k2] ) |
---|
1248 | |
---|
1249 | assert nn==n2, 'some file lost!!!!!!' |
---|
1250 | if len(elist) == 0: |
---|
1251 | self.info = '%s %s, %s' % (nn, maybe_plural("file", nn), str(ee.keys())) |
---|
1252 | else: |
---|
1253 | self.info = '%s %s, %s frequencies, severe errors in file names: %s' % (nn, maybe_plural("file", nn), len(ee.keys()), len(elist)) |
---|
1254 | for e in elist: |
---|
1255 | self.info += '\n%s' % e |
---|
1256 | self.ee = ee |
---|
1257 | |
---|
1258 | def check(self, recorder=None,calendar='None',norun=False): |
---|
1259 | self.errorCount = 0 |
---|
1260 | self.recorder=recorder |
---|
1261 | self.calendar=calendar |
---|
1262 | if calendar == '360-day': |
---|
1263 | self.enddec = 30 |
---|
1264 | else: |
---|
1265 | self.enddec = 31 |
---|
1266 | mm = { 'enddec':self.enddec } |
---|
1267 | self.pats = {'mon':('(?P<d>[0-9]{3})101','(?P<e>[0-9]{3})012'), \ |
---|
1268 | 'sem':('(?P<d>[0-9]{3})(012|101)','(?P<e>[0-9]{3})(011|010)'), \ |
---|
1269 | 'day':('(?P<d>[0-9]{3}[16])0101','(?P<e>[0-9]{3}[50])12%(enddec)s' % mm), \ |
---|
1270 | 'subd':('(?P<d>[0-9]{4})0101(?P<h1>[0-9]{2})(?P<mm>[30]0){0,1}$', '(?P<e>[0-9]{4})12%(enddec)s(?P<h2>[0-9]{2})([30]0){0,1}$' % mm ), \ |
---|
1271 | 'subd2':('(?P<d>[0-9]{4})0101(?P<h1>[0-9]{2})', '(?P<e>[0-9]{4})010100' ) } |
---|
1272 | |
---|
1273 | if not norun: |
---|
1274 | self.runChecks() |
---|
1275 | |
---|
1276 | def checkTrange(self): |
---|
1277 | """Manage time range checks: loop over groups of files identified by :meth:`impt`""" |
---|
1278 | keys = self.ee.keys() |
---|
1279 | keys.sort() |
---|
1280 | for k in keys: |
---|
1281 | if k not in ['fx','fixed']: |
---|
1282 | keys2 = self.ee[k].keys() |
---|
1283 | keys2.sort() |
---|
1284 | for k2 in keys2: |
---|
1285 | self.checkThisTrange( self.ee[k][k2], k ) |
---|
1286 | |
---|
1287 | def checkThisTrange( self, tt, group): |
---|
1288 | """Check consistency across a list of time ranges""" |
---|
1289 | |
---|
1290 | if group in ['3hr','6hr']: |
---|
1291 | kg = 'subd' |
---|
1292 | else: |
---|
1293 | kg = group |
---|
1294 | ps = self.pats[kg] |
---|
1295 | rere = (re.compile( ps[0] ), re.compile( ps[1] ) ) |
---|
1296 | |
---|
1297 | n = len(tt) |
---|
1298 | self.checkId = ('001','filename_timerange_value') |
---|
1299 | for j in range(n): |
---|
1300 | if self.monitor != None: |
---|
1301 | nofh0 = self.monitor.get_open_fds() |
---|
1302 | t = tt[j] |
---|
1303 | fn = t[1] |
---|
1304 | isFirst = j == 0 |
---|
1305 | isLast = j == n-1 |
---|
1306 | lok = True |
---|
1307 | for i in [0,1]: |
---|
1308 | if not (i==0 and isFirst or i==1 and isLast): |
---|
1309 | x = rere[i].match( t[3][i] ) |
---|
1310 | lok &= self.test( x != None, 'Cannot match time range %s: %s [%s/%s]' % (i,fn,j,n), part=True, appendLogfile=(self.fLogDict.get(fn,None),fn) ) |
---|
1311 | if not lok: |
---|
1312 | if self.recorder != None: |
---|
1313 | self.recorder.modify( t[1], 'ERROR: time range' ) |
---|
1314 | if self.monitor != None: |
---|
1315 | nofh9 = self.monitor.get_open_fds() |
---|
1316 | if nofh9 > nofh0: |
---|
1317 | print 'Open file handles: %s --- %s [%s]' % (nofh0, nofh9, j ) |
---|
1318 | |
---|
1319 | ### http://stackoverflow.com/questions/2023608/check-what-files-are-open-in-python |
---|
1320 | class sysMonitor(object): |
---|
1321 | |
---|
1322 | def __init__(self): |
---|
1323 | self.fhCountMax = 0 |
---|
1324 | |
---|
1325 | def get_open_fds(self): |
---|
1326 | ''' |
---|
1327 | return the number of open file descriptors for current process |
---|
1328 | .. warning: will only work on UNIX-like os-es. |
---|
1329 | ''' |
---|
1330 | import subprocess |
---|
1331 | import os |
---|
1332 | |
---|
1333 | pid = os.getpid() |
---|
1334 | self.procs = subprocess.check_output( |
---|
1335 | [ "lsof", '-w', '-Ff', "-p", str( pid ) ] ) |
---|
1336 | |
---|
1337 | self.ps = filter( |
---|
1338 | lambda s: s and s[ 0 ] == 'f' and s[1: ].isdigit(), |
---|
1339 | self.procs.split( '\n' ) ) |
---|
1340 | self.fhCountMax = max( self.fhCountMax, len(self.ps) ) |
---|
1341 | return len( self.ps ) |
---|
1342 | |
---|
1343 | def maybe_plural(word, n): |
---|
1344 | "Return ``word`` if n is 1, and plural of ``word`` if n is not." |
---|
1345 | if n == 1: |
---|
1346 | return word |
---|
1347 | return "%ss" % word |
---|