source: CCCC/tags/0.1/c4.py @ 155

Subversion URL: http://proj.badc.rl.ac.uk/svn/exarch/CCCC/tags/0.1/c4.py@1241
Revision 155, 17.6 KB checked in by mjuckes, 6 years ago (diff)

various fixes for aMap feature -- see updates.txt

Line 
1
2# Standard library imports
3import os, string, time, logging, sys, glob, pkgutil
4
5## pkgutil is used in file_utils
6# Third party imports
7
8## Local imports with 3rd party dependencies
9#### netcdf --- currently only support for cmds2 -- re-arranged to facilitate support for alternative modules
10
11import file_utils
12
13from file_utils import fileMetadata, ncLib
14
15# Local imports
16import utils_c4 as utils
17import config_c4 as config
18
19reload( utils )
20
21
22#driving_model_ensemble_member = <CMIP5Ensemble_member>
23#rcm_version_id = <RCMVersionID>                     
24
25class dummy:
26   pass
27
28pathTmplDict = { 'CORDEX':'%(project)s/%(product)s/%(domain)s/%(institute)s/%(driving_model)s/%(experiment)s/%(ensemble)s/%(model)s/%(model_version)s/%(frequency)s/%(variable)s/files/%%(version)s/',   \
29                 'SPECS':'%(project)s/%(product)s/%(institute)s/%(model)s/%(experiment)s_%(series)s/%(start_date)s/%(frequency)s/%(realm)s/%(variable)s/%(ensemble)s/files/%%(version)s/', \
30                 'CMIP5':'%(project)s/%(product)s/%(institute)s/%(model)s/%(experiment)s/%(frequency)s/%(realm)s/%(table)s/%(ensemble)s/files/%%(version)s/%(variable)s/', \
31                 '__def__':'%(project)s/%(product)s/%(institute)s/%(model)s/%(experiment)s/%(frequency)s/%(realm)s/%(variable)s/%(ensemble)s/files/%%(version)s/', \
32               }
33
34class recorder:
35
36  def __init__(self,project,fileName,type='map',dummy=False):
37    self.dummy = dummy
38    self.file = fileName
39    self.type = type
40    self.pathTmpl = '%(project)s/%(product)s/%(domain)s/%(institute)s/%(driving_model)s/%(experiment)s/%(ensemble)s/%(model)s/%(model_version)s/%(frequency)s/%(variable)s/files/%%(version)s/'
41    self.pathTmpl = pathTmplDict.get(project,pathTmplDict['__def__'])
42    self.records = {}
43
44  def open(self):
45    if self.type == 'map':
46      self.fh = open( self.file, 'a' )
47    else:
48      self.sh = shelve.open( self.file )
49
50  def close(self):
51    if self.type == 'map':
52      self.fh.close()
53    else:
54      self.sh.close()
55
56  def add(self,fpath,drs,safe=True):
57    assert self.type == 'map','Can only do map files at present'
58    assert type(drs) == type( {} ), '2nd user argument to method add should be a dictionary [%s]' % type(drs)
59    tpath = self.pathTmpl % drs
60    if not self.dummy:
61      assert os.path.isfile( fpath ), 'File %s not found' % fpath
62      fdate = time.ctime(os.path.getmtime(fpath))
63      sz = os.stat(fpath).st_size
64    else:
65      fdate = "na"
66      sz = 0
67    record = '%s | OK | %s | modTime = %s | target = %s ' % (fpath,sz,fdate,tpath)
68    for k in ['creation_date','tracking_id']:
69      if k in drs.keys():
70        record += ' | %s = %s' % (k,drs[k])
71
72    fn = string.split( fpath, '/' )[-1]
73    self.records[fn] = record
74 
75  def modify(self,fn,msg):
76    assert fn in self.records.keys(),'Attempt to modify non-existent record %s, %s' % [fn,str(self.records.keys()[0:10])]
77    if string.find( self.records[fn], '| OK |') == -1:
78      ##print 'File %s already flagged with errors' % fn
79      return
80    s = string.replace( self.records[fn], '| OK |', '| %s |' % msg )
81    ##print '--> ',s
82    self.records[fn] = s
83
84  def dumpAll(self,safe=True):
85    keys = self.records.keys()
86    keys.sort()
87    for k in keys:
88      self.dump( self.records[k], safe=safe )
89
90  def dump( self, record, safe=True ):
91    if safe:
92      self.open()
93    self.fh.write( record + '\n' )
94    if safe:
95      self.close()
96
97  def addErr(self,fpath,reason,safe=True):
98    record = '%s | %s' % (fpath, reason)
99    fn = string.split( fpath, '/' )[-1]
100    self.records[fn] = record
101
102class checker:
103  def __init__(self, pcfg, cls,reader,abortMessageCount=-1):
104    self.info = dummy()
105    self.info.pcfg = pcfg
106    self.info.abortMessageCount = abortMessageCount
107    self.calendar = 'None'
108    self.ncReader = reader
109    self.cfn = utils.checkFileName( parent=self.info,cls=cls)
110    self.cga = utils.checkGlobalAttributes( parent=self.info,cls=cls)
111    self.cgd = utils.checkStandardDims( parent=self.info,cls=cls)
112    self.cgg = utils.checkGrids( parent=self.info,cls=cls)
113    self.cls = cls
114
115    # Define vocabs based on project
116    ##self.vocabs = getVocabs(pcgf)
117    self.vocabs = pcfg.vocabs
118
119  def checkFile(self,fpath,log=None,attributeMappings=[]):
120    self.calendar = 'None'
121    self.info.log = log
122
123    fn = string.split( fpath, '/' )[-1]
124
125    if attributeMappings != []:
126      self.ncReader.loadNc( fpath )
127      self.ncReader.applyMap( attributeMappings, self.cfn.globalAttributesInFn, log=log )
128      ncRed = True
129      thisFn = self.ncReader.fn
130    else:
131      ncRed = False
132      thisFn = fn
133
134    self.cfn.check( thisFn )
135    if not self.cfn.completed:
136      self.completed = False
137      return
138    if not self.info.pcfg.project[:2] == '__':
139      if not os.path.isfile( fpath ):
140        print 'File %s not found [2]' % fpath
141        self.completed = False
142        return
143
144    if not ncRed:
145      ##print fpath
146      self.ncReader.loadNc( fpath )
147    self.ga = self.ncReader.ga
148    self.va = self.ncReader.va
149    self.da = self.ncReader.da
150
151    if self.cfn.freq != None:
152      vGroup = self.cfn.freq
153    else:
154      vGroup = self.info.pcfg.mipVocabVgmap.get(self.cfn.group,self.cfn.group)
155    self.cga.check( self.ga, self.va, self.cfn.var, vGroup, self.vocabs, self.cfn.fnParts )
156    if not self.cga.completed:
157      self.completed = False
158      return
159
160    ##self.cgd.plevRequired = config.plevRequired
161    ##self.cgd.plevValues = config.plevValues
162    ##self.cgd.heightRequired = config.heightRequired
163    ##self.cgd.heightValues = config.heightValues
164    ##self.cgd.heightRange = config.heightRange
165    self.cgd.check( self.cfn.var, self.cfn.freq, self.da, self.va, self.cga.isInstantaneous )
166    self.calendar = self.cgd.calendar
167    if not self.cgd.completed:
168      self.completed = False
169      return
170
171    if self.info.pcfg.doCheckGrids:
172      ##self.cgg.rotatedPoleGrids = config.rotatedPoleGrids
173      ##self.cgg.interpolatedGrids = config.interpolatedGrids
174      self.cgg.check( self.cfn.var, self.cfn.domain, self.da, self.va )
175   
176      if not self.cgg.completed:
177        self.completed = False
178        return
179    self.completed = True
180    self.drs = self.cga.getDrs()
181    self.drs['project'] = self.info.pcfg.project
182    self.errorCount = self.cfn.errorCount + self.cga.errorCount + self.cgd.errorCount + self.cgg.errorCount
183
184class c4_init:
185
186  def __init__(self,args=None):
187    self.logByFile = True
188    self.policyFileLogfileMode = 'w'
189    self.policyBatchLogfileMode = 'np'
190    if args==None:
191       args = sys.argv[1:]
192    nn = 0
193
194    self.attributeMappingFile = None
195    self.recordFile = 'Rec.txt'
196    self.logDir = 'logs_02'
197    self.errs = []
198   
199    # Set default project to "CORDEX"
200    self.project = "CORDEX"
201    self.holdExceptions = False
202    forceLogOrg = None
203
204    while len(args) > 0:
205      next = args.pop(0)
206      if next == '-f':
207        flist = [args.pop(0),]
208        self.logByFile = False
209      elif next == '--log':
210        x = args.pop(0)
211        assert x in ['single','multi','s','m'], 'unrecognised logging option (--log): %s' % (x)
212        if x in ['multi','m']:
213           forceLogOrg = 'multi'
214        elif x in ['single','s']:
215           forceLogOrg = 'single'
216      elif next == '--flfmode':
217        lfmk = args.pop(0)
218        assert lfmk in ['a','n','np','w','wo'], 'Unrecognised file logfile mode (--flfmode): %s' % lfmk
219        self.policyFileLogfileMode = lfmk
220      elif next == '--blfmode':
221        lfmk = args.pop(0)
222        assert lfmk in ['a','n','np','w','wo'], 'Unrecognised batch logfile mode (--blfmode): %s' % lfmk
223        self.policyBatchLogfileMode = lfmk
224      elif next == '-d':
225        fdir = args.pop(0)
226        flist = glob.glob( '%s/*.nc' % fdir  )
227      elif next == '-D':
228        flist  = []
229        fdir = args.pop(0)
230        for root, dirs, files in os.walk( fdir, followlinks=True ):
231          for f in files:
232            fpath = '%s/%s' % (root,f)
233            if (os.path.isfile( fpath ) or os.path.islink( fpath )) and f[-3:] == '.nc':
234              flist.append( fpath )
235      elif next == '-R':
236        self.recordFile = args.pop(0)
237      elif next == '--ld':
238        self.logDir = args.pop(0)
239      elif next in ['--catchAllExceptions','--cae']:
240        self.holdExceptions = True
241      elif next == '--aMap':
242        self.attributeMappingFile = args.pop(0)
243        assert os.path.isfile( self.attributeMappingFile ), 'The token "--aMap" should be followed by the path or name of a file'
244      elif next == "-p":
245        self.project = args.pop(0)
246      else:
247       print 'Unused argument: %s' % next
248       nn+=1
249    assert nn==0, 'Aborting because of unused arguments'
250
251    if self.project == 'CMIP5':
252      fl0 = []
253      for f in flist:
254        if string.find( f, '/latest/' ) != -1:
255          fl0.append(f)
256      flist = fl0
257
258    if forceLogOrg != None:
259      if forceLogOrg == 'single':
260        self.logByFile = False
261      else:
262        self.logByFile = True
263
264    if self.project[:2] == '__':
265       flist = []
266       ss = 'abcdefgijk'
267       ss = 'abcdefgijklmnopqrstuvwxyz'
268       ss = 'abc'
269       for i in range(10):
270         v = 'v%s' % i
271         for a in ss:
272           for b in ss:
273             flist.append( '%s_day_%s_%s_1900-1909.nc' % (v,a,b) )
274    flist.sort()
275    fnl = []
276    for f in flist:
277      fn = string.split(f, '/')[-1]
278      fnl.append(fn)
279    nd = 0
280    dupl = []
281    for k in range(len(fnl)-1):
282      if fnl[k] == fnl[k-1]:
283        nd += 1
284        dupl.append( fnl[k] )
285    self.dupDict = {}
286    for f in dupl:
287      self.dupDict[f] = 0
288    if nd != 0:
289      self.errs.append( 'Duplicate file names encountered: %s' % nd )
290      self.errs.append( dupl )
291    self.flist = flist
292    self.fnl = fnl
293    if not os.path.isdir(   self.logDir ):
294       os.mkdir(   self.logDir )
295
296    tstring1 = '%4.4i%2.2i%2.2i_%2.2i%2.2i%2.2i' % time.gmtime()[0:6]
297    self.batchLogfile = '%s/qcBatchLog_%s.txt' % (  self.logDir,tstring1)
298## default appending to myapp.log; mode='w' forces a new file (deleting old contents).
299    self.logger = logging.getLogger('c4logger')
300    if self.policyBatchLogfileMode in ['n','np']:
301        assert not os.path.isfile( self.batchLogfile ), '%s exists and policy set to new file' % self.batchLogfile
302    m = self.policyBatchLogfileMode[0]
303    if m == 'n':
304      m = 'w'
305    if m == 'a':
306      self.hdlr = logging.FileHandler(self.batchLogfile)
307    else:
308      self.hdlr = logging.FileHandler(self.batchLogfile,mode=m)
309    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
310    self.hdlr.setFormatter(formatter)
311    self.logger.setLevel(logging.INFO)
312    self.logger.addHandler(self.hdlr)
313
314    self.attributeMappings = []
315    self.attributeMappingsLog = None
316    if self.attributeMappingFile != None:
317      for l in open( self.attributeMappingFile ).readlines():
318        if l[0] != '#':
319          bb = string.split( string.strip(l), '|' ) 
320          assert len(bb) ==2, "Error in experimental module attributeMapping -- configuration line not scanned [%s]" % str(l)
321          bits = string.split( bb[0], ';' )
322          cl = []
323          for b in bits:
324            cl.append( string.split(b, '=' ) )
325          self.attributeMappings.append( ('am001',cl, string.split(bb[1],'=') ) )
326      self.attributeMappingsLog = open( 'attributeMappingsLog.txt', 'w' )
327
328  def getFileLog( self, fn, flf=None ):
329    if flf == None:
330      tstring2 = '%4.4i%2.2i%2.2i' % time.gmtime()[0:3]
331      if fn in self.dupDict.keys():
332        tag = '__%2.2i' % self.dupDict[fn]
333        self.dupDict[fn] += 1
334      else:
335        tag = ''
336      self.fileLogfile = '%s/%s%s__qclog_%s.txt' % (self.logDir,fn[:-3],tag,tstring2)
337      if self.policyFileLogfileMode in ['n','np']:
338        assert not os.path.isfile( self.fileLogfile ), '%s exists and policy set to new file' % self.fileLogfile
339      m = self.policyFileLogfileMode[0]
340      if m == 'n':
341        m = 'w'
342    else:
343      m = 'a'
344      self.fileLogfile = flf
345
346    self.fLogger = logging.getLogger('fileLog_%s_%s' % (fn,m))
347    if m == 'a':
348      self.fHdlr = logging.FileHandler(self.fileLogfile)
349    else:
350      self.fHdlr = logging.FileHandler(self.fileLogfile,mode=m)
351    fileFormatter = logging.Formatter('%(message)s')
352    self.fHdlr.setFormatter(fileFormatter)
353    self.fLogger.addHandler(self.fHdlr)
354    self.fLogger.setLevel(logging.INFO)
355    return self.fLogger
356
357  def closeFileLog(self):
358    self.fLogger.removeHandler(self.fHdlr)
359    self.fHdlr.close()
360    if self.policyFileLogfileMode in ['wo','np']:
361      os.popen( 'chmod %s %s;' % (444, self.fileLogfile) )
362
363  def closeBatchLog(self):
364    self.logger.removeHandler(self.hdlr)
365    self.hdlr.close()
366    if self.policyBatchLogfileMode in ['wo','np']:
367      os.popen( 'chmod %s %s;' % (444, self.batchLogfile) )
368
369
370class main:
371
372  def __init__(self,args=None,abortMessageCount=-1,printInfo=False,monitorFileHandles = False):
373    logDict = {}
374    ecount = 0
375    c4i = c4_init(args=args)
376    isDummy  = c4i.project[:2] == '__'
377    if (ncLib == dummy) and (not isDummy):
378       print ncLib, c4i.project
379       print 'Cannot proceed with non-dummy project without cdms'
380       raise
381    pcfg = config.projectConfig( c4i.project )
382    ncReader = fileMetadata(dummy=isDummy, attributeMappingsLog=c4i.attributeMappingsLog)
383    self.cc = checker(pcfg, c4i.project, ncReader,abortMessageCount=abortMessageCount)
384    rec = recorder( c4i.project, c4i.recordFile, dummy=isDummy )
385    if monitorFileHandles:
386      self.monitor = utils.sysMonitor()
387    else:
388      self.monitor = None
389
390    cal = None
391    c4i.logger.info( 'Starting batch -- number of file: %s' % (len(c4i.flist)) )
392    if len( c4i.errs ) > 0:
393      for i in range(0,len( c4i.errs ), 2 ):
394        c4i.logger.info( c4i.errs[i] )
395 
396    self.cc.info.amapListDraft = []
397    cbv = utils.checkByVar( parent=self.cc.info,cls=c4i.project,monitor=self.monitor)
398    cbv.impt( c4i.flist )
399    if printInfo:
400      print cbv.info
401
402    fileLogOpen = False
403    self.resList =  []
404    for f in c4i.flist:
405      rv = False
406      ec = None
407      if monitorFileHandles:
408        nofhStart = self.monitor.get_open_fds()
409      fn = string.split(f,'/')[-1]
410      c4i.logger.info( 'Starting: %s' % fn )
411      try:
412  ### need to have a unique name, otherwise get mixing of logs despite close statement below.
413  ### if duplicate file names are present, this will be recorded in the main log, tag appended to file level log name (not yet tested).
414        if c4i.logByFile:
415          fLogger = c4i.getFileLog( fn )
416          logDict[fn] = c4i.fileLogfile
417          c4i.logger.info( 'Log file: %s' % c4i.fileLogfile )
418          fileLogOpen = True
419        else:
420          fLogger = c4i.logger
421 
422        fLogger.info( 'Starting file %s' % fn )
423## default appending to myapp.log; mode='w' forces a new file (deleting old contents).
424        self.cc.checkFile( f, log=fLogger,attributeMappings=c4i.attributeMappings )
425
426        if self.cc.completed:
427          if cal not in (None, 'None') and self.cc.cgd.varGroup != "fx":
428            if cal != self.cc.calendar:
429              cal_change_err_msg = 'Error: change in calendar attribute %s --> %s' % (cal, self.cc.calendar)
430              c4i.logger.info(cal_change_err_msg)
431              fLogger.info(cal_change_err_msg)
432              self.cc.errorCount += 1
433
434          cal = self.cc.calendar
435          ec = self.cc.errorCount
436        rv =  ec == 0
437        self.resList.append( (rv,ec) )
438
439        if c4i.logByFile:
440          if self.cc.completed:
441            fLogger.info( 'Done -- error count %s' % self.cc.errorCount )
442          else:
443            fLogger.info( 'Done -- checks not completed' )
444          c4i.closeFileLog( )
445          fileLogOpen = False
446
447        if self.cc.completed:
448          c4i.logger.info( 'Done -- error count %s' % self.cc.errorCount ) 
449          ecount += self.cc.errorCount
450          if self.cc.errorCount == 0:
451            rec.add( f, self.cc.drs )
452          else:
453            rec.addErr( f, 'ERRORS FOUND | errorCount = %s' % self.cc.errorCount )
454        else:
455          ecount += 20
456          c4i.logger.info( 'Done -- testing aborted because of severity of errors' )
457          rec.addErr( f, 'ERRORS FOUND AND CHECKS ABORTED' )
458      except:
459        c4i.logger.error("Exception has occured" ,exc_info=1)
460        if fileLogOpen:
461          fLogger.error("xxxxxx: FAILED:: Exception has occured" ,exc_info=1)
462          c4i.closeFileLog( )
463          fileLogOpen = False
464        rec.addErr( f, 'ERROR: Exception' )
465        if not c4i.holdExceptions:
466          raise
467      if monitorFileHandles:
468        nofhEnd = self.monitor.get_open_fds()
469        if nofhEnd > nofhStart:
470           print 'Open file handles: %s --- %s' % (nofhStart, nofhEnd)
471 
472    self.cc.info.log = c4i.logger
473   
474    if c4i.project not in ['SPECS','CCMI','CMIP5']:
475       cbv.c4i = c4i
476       cbv.setLogDict( logDict )
477       cbv.check( recorder=rec, calendar=self.cc.calendar)
478       try:
479         ecount += cbv.errorCount
480       except:
481         ecount = None
482    ncReader.close()
483    if type( self.cc.info.amapListDraft ) == type( [] ) and len(  self.cc.info.amapListDraft ) > 0:
484      ll =  self.cc.info.amapListDraft
485      ll.sort()
486      oo = open( 'amapDraft.txt', 'w' )
487      oo.write( ll[0] + '\n' )
488      for i in range( 1,len(ll) ):
489        if ll[i] != ll[i-1]:
490          oo.write( ll[i] + '\n' )
491      oo.close()
492    rec.dumpAll()
493    if printInfo:
494      print 'Error count %s' % ecount
495    ##c4i.hdlr.close()
496    c4i.closeBatchLog()
497    self.ok = all( map( lambda x: x[0], self.resList ) )
498if __name__ == '__main__':
499  main(printInfo=True)
500
501
502##else:
503  ##f1 = '/data/u10/cordex/AFR-44/SMHI/ECMWF-ERAINT/evaluation/SMHI-RCA4/v1/day/clh/clh_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_SMHI-RCA4_v1_day_19810101-19851231.nc'
504  ##f2 = '/data/u10/cordex/AFR-44/SMHI/ECMWF-ERAINT/evaluation/SMHI-RCA4/v1/sem/tas/tas_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_SMHI-RCA4_v1_sem_200012-201011.nc'
505  ##f3 = '/data/u10/cordex/AFR-44i/SMHI/ECMWF-ERAINT/evaluation/SMHI-RCA4/v1/mon/tas/tas_AFR-44i_ECMWF-ERAINT_evaluation_r1i1p1_SMHI-RCA4_v1_mon_199101-200012.nc'
506  ##cc.checkFile( f3 )
Note: See TracBrowser for help on using the repository browser.