source: CCCC/trunk/ceda_cc/c4.py @ 325

Subversion URL: http://proj.badc.rl.ac.uk/svn/exarch/CCCC/trunk/ceda_cc/c4.py@325
Revision 325, 15.6 KB checked in by mjuckes, 6 years ago (diff)

corrected frequency in ESA-CCI configuration file

Line 
1"""ceda_cc
2##########
3USAGE: see README.txt in distribution directory.
4"""
5import sys
6from ccinit import c4_init
7
8testmain=False
9## callout to summary.py: if this option is selected, imports of libraries are not needed.
10if not testmain:
11  if __name__ == '__main__':
12   if len(sys.argv) > 1:
13     if sys.argv[1] == '--sum':
14        import summary
15        summary.main()
16        raise SystemExit(0)
17     elif sys.argv[1] == '-v':
18        from versionConfig import version, versionComment
19        print 'ceda-cc version %s [%s]' % (version,versionComment)
20        raise SystemExit(0)
21     elif sys.argv[1] == '--unitTest':
22        print "Starting test suite 1"
23        import unitTestsS1
24        print "Starting test suite 2"
25        import unitTestsS2
26        print "Tests completed"
27        raise SystemExit(0)
28   else:
29     print __doc__
30     raise SystemExit(0)
31
32# Standard library imports
33import os, string, time, glob, pkgutil
34import shutil
35## pkgutil is used in file_utils
36# Third party imports
37
38## Local imports with 3rd party dependencies
39#### netcdf --- currently only support for cmds2 -- re-arranged to facilitate support for alternative modules
40
41import file_utils
42
43from file_utils import fileMetadata, ncLib
44
45# Local imports
46import utils_c4 as utils
47import config_c4 as config
48
49reload( utils )
50
51from xceptions import baseException
52
53from fcc_utils2 import tupsort
54
55
56#driving_model_ensemble_member = <CMIP5Ensemble_member>
57#rcm_version_id = <RCMVersionID>                     
58
59class dummy(object):
60  def __init__(self):
61     self.experimental = None
62     self.parent = None
63
64pathTmplDict = { 'CORDEX':'%(project)s/%(product)s/%(domain)s/%(institute)s/%(driving_model)s/%(experiment)s/%(ensemble)s/%(model)s/%(model_version)s/%(frequency)s/%(variable)s/files/%%(version)s/',   \
65                 'SPECS':'%(project)s/%(product)s/%(institute)s/%(model)s/%(experiment)s/%(start_date)s/%(frequency)s/%(realm)s/%(table)s/%(variable)s/%(ensemble)s/files/%%(version)s/', \
66                 'CMIP5':'%(project)s/%(product)s/%(institute)s/%(model)s/%(experiment)s/%(frequency)s/%(realm)s/%(table)s/%(ensemble)s/files/%%(version)s/%(variable)s/', \
67                 'CCMI':'%(project)s/%(product)s/%(institute)s/%(model)s/%(experiment)s/%(frequency)s/%(realm)s/%(table)s/%(ensemble)s/files/%%(version)s/%(variable)s/', \
68                 'ESA-CCI':'%(level)s/%(platform)s/%(sensor)s/%(variable)s/', \
69                 '__def__':'%(project)s/%(product)s/%(institute)s/%(model)s/%(experiment)s/%(frequency)s/%(realm)s/%(variable)s/%(ensemble)s/files/%%(version)s/', \
70               }
71
72## Core DRS: list of vocab names
73## Path template: -- current version puts upper case in "project"
74## Dataset template: 
75
76class recorder(object):
77
78  def __init__(self,project,fileName,type='map',dummy=False):
79    self.dummy = dummy
80    self.file = fileName
81    self.type = type
82    self.pathTmpl = '%(project)s/%(product)s/%(domain)s/%(institute)s/%(driving_model)s/%(experiment)s/%(ensemble)s/%(model)s/%(model_version)s/%(frequency)s/%(variable)s/files/%%(version)s/'
83    self.pathTmpl = pathTmplDict.get(project,pathTmplDict['__def__'])
84    self.records = {}
85    self.tidtupl = []
86
87  def open(self):
88    if self.type == 'map':
89      self.fh = open( self.file, 'a' )
90    else:
91      self.sh = shelve.open( self.file )
92
93  def close(self):
94    if self.type == 'map':
95      self.fh.close()
96    else:
97      self.sh.close()
98
99  def add(self,fpath,drs,safe=True):
100    assert self.type == 'map','Can only do map files at present'
101    assert type(drs) == type( {} ), '2nd user argument to method add should be a dictionary [%s]' % type(drs)
102    tpath = self.pathTmpl % drs
103    if not self.dummy:
104      assert os.path.isfile( fpath ), 'File %s not found' % fpath
105      fdate = time.ctime(os.path.getmtime(fpath))
106      sz = os.stat(fpath).st_size
107    else:
108      fdate = "na"
109      sz = 0
110    record = '%s | OK | %s | modTime = %s | target = %s ' % (fpath,sz,fdate,tpath)
111    fn = string.split( fpath, '/' )[-1]
112    for k in ['creation_date','tracking_id']:
113      if k in drs.keys():
114        record += ' | %s = %s' % (k,drs[k])
115        if k == 'tracking_id':
116          self.tidtupl.append( (fn,drs[k]) )
117
118    self.records[fn] = record
119 
120  def modify(self,fn,msg):
121    assert fn in self.records.keys(),'Attempt to modify non-existent record %s, %s' % [fn,str(self.records.keys()[0:10])]
122    if string.find( self.records[fn], '| OK |') == -1:
123      ##print 'File %s already flagged with errors' % fn
124      return
125    s = string.replace( self.records[fn], '| OK |', '| %s |' % msg )
126    ##print '--> ',s
127    self.records[fn] = s
128
129  def checktids(self):
130## sort by tracking id
131    if len( self.tidtupl ) == 1:
132      return
133    self.tidtupl.sort( cmp=tupsort(k=1).cmp )
134    nd = 0
135    fnl = []
136    for k in range(len(self.tidtupl)-1):
137      if self.tidtupl[k][1] == self.tidtupl[k+1][1]:
138        print 'Duplicate tracking_id: %s, %s:: %s' % (self.tidtupl[k][0],self.tidtupl[k+1][0],self.tidtupl[k][1])
139        nd += 1
140        if len(fnl) == 0 or fnl[-1] != self.tidtupl[k][0]:
141          fnl.append( self.tidtupl[k][0])
142        fnl.append( self.tidtupl[k+1][0])
143    if nd == 0:
144      print 'No duplicate tracking ids found in %s files' % len(self.tidtupl)
145    else:
146      print '%s duplicate tracking ids' % nd
147      for f in fnl:
148        self.modify( f, 'ERROR: duplicate tid' )
149
150  def dumpAll(self,safe=True):
151    keys = self.records.keys()
152    keys.sort()
153    for k in keys:
154      self.dump( self.records[k], safe=safe )
155
156  def dump( self, record, safe=True ):
157    if safe:
158      self.open()
159    self.fh.write( record + '\n' )
160    if safe:
161      self.close()
162
163  def addErr(self,fpath,reason,safe=True):
164    record = '%s | %s' % (fpath, reason)
165    fn = string.split( fpath, '/' )[-1]
166    self.records[fn] = record
167
168class checker(object):
169  def __init__(self, pcfg, cls,reader,abortMessageCount=-1,experimental=False):
170    self.info = dummy()
171    self.info.pcfg = pcfg
172    self.info.fileIsFixed = None
173    self.info.abortMessageCount = abortMessageCount
174    self.info.experimental = experimental
175    self.calendar = 'None'
176    self.ncReader = reader
177    self.cfn = utils.checkFileName( parent=self.info,cls=cls)
178    self.cga = utils.checkGlobalAttributes( parent=self.info,cls=cls)
179    self.cgd = utils.checkStandardDims( parent=self.info,cls=cls)
180    self.cgg = utils.checkGrids( parent=self.info,cls=cls)
181    self.cls = cls
182
183    # Define vocabs based on project
184    ##self.vocabs = getVocabs(pcgf)
185    self.vocabs = pcfg.vocabs
186
187  def checkFile(self,fpath,log=None,attributeMappings=[]):
188    self.calendar = 'None'
189    self.info.log = log
190
191    fn = string.split( fpath, '/' )[-1]
192
193    if attributeMappings != []:
194      self.ncReader.loadNc( fpath )
195      self.ncReader.applyMap( attributeMappings, self.cfn.globalAttributesInFn, log=log )
196      ncRed = True
197      thisFn = self.ncReader.fn
198    else:
199      ncRed = False
200      thisFn = fn
201
202    self.cfn.check( thisFn )
203    if not self.cfn.completed:
204      self.completed = False
205      return
206    if not self.info.pcfg.projectV.id[:2] == '__':
207      if not os.path.isfile( fpath ):
208        print 'File %s not found [2]' % fpath
209        self.completed = False
210        return
211
212    if not ncRed:
213      ##print fpath
214      self.ncReader.loadNc( fpath )
215    self.ga = self.ncReader.ga
216    self.va = self.ncReader.va
217    self.da = self.ncReader.da
218
219    if self.cfn.freq != None:
220      vGroup = self.cfn.freq
221    else:
222      vGroup = self.info.pcfg.mipVocabVgmap.get(self.cfn.group,self.cfn.group)
223    self.cga.check( self.ga, self.va, self.cfn.var, vGroup, self.vocabs, self.cfn.fnParts )
224    if not self.cga.completed:
225      self.completed = False
226      return
227
228    ##self.cgd.plevRequired = config.plevRequired
229    ##self.cgd.plevValues = config.plevValues
230    ##self.cgd.heightRequired = config.heightRequired
231    ##self.cgd.heightValues = config.heightValues
232    ##self.cgd.heightRange = config.heightRange
233    self.cgd.check( self.cfn.var, self.cfn.freq, self.da, self.va, self.cga.isInstantaneous, self.vocabs )
234    self.calendar = self.cgd.calendar
235    if not self.cgd.completed:
236      self.completed = False
237      return
238
239    if self.info.pcfg.doCheckGrids:
240      ##self.cgg.rotatedPoleGrids = config.rotatedPoleGrids
241      ##self.cgg.interpolatedGrids = config.interpolatedGrids
242      self.cgg.check( self.cfn.var, self.cfn.domain, self.da, self.va )
243   
244      if not self.cgg.completed:
245        self.completed = False
246        return
247    self.completed = True
248    self.drs = self.cga.getDrs()
249    self.drs['project'] = self.info.pcfg.projectV.id
250    self.errorCount = self.cfn.errorCount + self.cga.errorCount + self.cgd.errorCount + self.cgg.errorCount
251
252class main(object):
253
254  def __init__(self,args=None,abortMessageCount=-1,printInfo=False,monitorFileHandles = False,cmdl=None):
255    logDict = {}
256    ecount = 0
257    c4i = c4_init(args=args)
258    c4i.logger.info( 'Starting batch -- number of file: %s' % (len(c4i.flist)) )
259    c4i.logger.info( 'Source: %s' % c4i.source )
260    if cmdl != None:
261      c4i.logger.info( 'Command: %s' % cmdl )
262     
263    isDummy  = c4i.project[:2] == '__'
264    if (ncLib == None) and (not isDummy):
265       raise baseException( 'Cannot proceed with non-dummy [%s] project without a netcdf API' % (c4i.project) )
266    pcfg = config.projectConfig( c4i.project )
267    assert pcfg.projectV.v == -1, 'Cannot handle anything other than latest version at present'
268    ncReader = fileMetadata(dummy=isDummy, attributeMappingsLog=c4i.attributeMappingsLog,forceLib=c4i.forceNetcdfLib)
269    c4i.logger.info( 'Python netcdf: %s' % ncReader.ncLib )
270    self.cc = checker(pcfg, c4i.project, ncReader,abortMessageCount=abortMessageCount, experimental=c4i.experimental)
271    rec = recorder( c4i.project, c4i.recordFile, dummy=isDummy )
272    self.ncLib = ncLib
273
274    # This list will record the drs dictionaries of all checked files for export to JSON
275    drs_list = []
276
277    if monitorFileHandles:
278      self.monitor = utils.sysMonitor()
279    else:
280      self.monitor = None
281
282    cal = None
283    if len( c4i.errs ) > 0:
284      for i in range(0,len( c4i.errs ), 2 ):
285        c4i.logger.info( c4i.errs[i] )
286 
287    self.cc.info.amapListDraft = []
288    cbv = utils.checkByVar( parent=self.cc.info,cls=c4i.project,monitor=self.monitor)
289    if c4i.project not in ['ESA-CCI']:
290      cbv.impt( c4i.flist )
291      if printInfo:
292        print cbv.info
293
294    fileLogOpen = False
295    self.resList =  []
296    stdoutsum = 2000
297    npass = 0
298    kf = 0
299    for f in c4i.flist:
300      kf += 1
301      rv = False
302      ec = None
303      if monitorFileHandles:
304        nofhStart = self.monitor.get_open_fds()
305      fn = string.split(f,'/')[-1]
306      c4i.logger.info( 'Starting: %s' % fn )
307      try:
308  ### need to have a unique name, otherwise get mixing of logs despite close statement below.
309  ### if duplicate file names are present, this will be recorded in the main log, tag appended to file level log name (not yet tested).
310        if c4i.logByFile:
311          fLogger = c4i.getFileLog( fn )
312          logDict[fn] = c4i.fileLogfile
313          c4i.logger.info( 'Log file: %s' % c4i.fileLogfile )
314          fileLogOpen = True
315        else:
316          fLogger = c4i.logger
317 
318        fLogger.info( 'Starting file %s' % fn )
319## default appending to myapp.log; mode='w' forces a new file (deleting old contents).
320        self.cc.checkFile( f, log=fLogger,attributeMappings=c4i.attributeMappings )
321
322        if self.cc.completed:
323          if cal not in (None, 'None') and self.cc.cgd.varGroup != "fx":
324            if cal != self.cc.calendar:
325              cal_change_err_msg = 'Error: change in calendar attribute %s --> %s' % (cal, self.cc.calendar)
326              c4i.logger.info(cal_change_err_msg)
327              fLogger.info(cal_change_err_msg)
328              self.cc.errorCount += 1
329
330          cal = self.cc.calendar
331          ec = self.cc.errorCount
332        rv =  ec == 0
333        if rv:
334          npass += 1
335        self.resList.append( (rv,ec) )
336
337        if c4i.logByFile:
338          if self.cc.completed:
339            fLogger.info( 'Done -- error count %s' % self.cc.errorCount )
340          else:
341            fLogger.info( 'Done -- checks not completed' )
342          c4i.closeFileLog( )
343          fileLogOpen = False
344
345        if self.cc.completed:
346          c4i.logger.info( 'Done -- error count %s' % self.cc.errorCount ) 
347          ecount += self.cc.errorCount
348          if self.cc.errorCount == 0:
349            rec.add( f, self.cc.drs )
350            drs_list.append({'path': f, 'drs': self.cc.drs})
351          else:
352            rec.addErr( f, 'ERRORS FOUND | errorCount = %s' % self.cc.errorCount )
353        else:
354          ecount += 20
355          c4i.logger.info( 'Done -- testing aborted because of severity of errors' )
356          rec.addErr( f, 'ERRORS FOUND AND CHECKS ABORTED' )
357      except:
358        c4i.logger.error("Exception has occured" ,exc_info=1)
359        if fileLogOpen:
360          fLogger.error("C4.100.001: [exception]: FAILED:: Exception has occured" ,exc_info=1)
361          c4i.closeFileLog( )
362          fileLogOpen = False
363        rec.addErr( f, 'ERROR: Exception' )
364        if not c4i.holdExceptions:
365          raise
366      if stdoutsum > 0 and kf%stdoutsum == 0:
367         print '%s files checked; %s passed this round' % (kf,npass)
368      if monitorFileHandles:
369        nofhEnd = self.monitor.get_open_fds()
370        if nofhEnd > nofhStart:
371           print 'Open file handles: %s --- %s' % (nofhStart, nofhEnd)
372 
373    self.cc.info.log = c4i.logger
374   
375    if c4i.project not in ['SPECS','CCMI','CMIP5','ESA-CCI']:
376       cbv.c4i = c4i
377       cbv.setLogDict( logDict )
378       cbv.check( recorder=rec, calendar=self.cc.calendar)
379       try:
380         ecount += cbv.errorCount
381       except:
382         ecount = None
383    ncReader.close()
384    if type( self.cc.info.amapListDraft ) == type( [] ) and len(  self.cc.info.amapListDraft ) > 0:
385      ll =  self.cc.info.amapListDraft
386      ll.sort()
387      oo = open( 'amapDraft.txt', 'w' )
388      oo.write( ll[0] + '\n' )
389      for i in range( 1,len(ll) ):
390        if ll[i] != ll[i-1]:
391          oo.write( ll[i] + '\n' )
392      oo.close()
393    if c4i.project in ['SPECS','CCMI','CMIP5']:
394      rec.checktids()
395    rec.dumpAll()
396
397    #!TODO: the recorder class could export JSON if it recorded the full drs dictionaries.
398    #       This lightweight solution re-uses the filename from the rec class and dumps
399    #       JSON in a separate function.
400    json_file = os.path.splitext(rec.file)[0] + '.json'
401    dump_drs_list(drs_list, json_file)
402
403    if printInfo:
404      print 'Error count %s' % ecount
405    ##c4i.hdlr.close()
406    c4i.closeBatchLog()
407    self.ok = all( map( lambda x: x[0], self.resList ) )
408
409
410def dump_drs_list(drs_list, filename):
411    import json
412    fh = open(filename, 'a+')
413    for drs in drs_list:
414                fh.write(json.dumps(drs))
415                fh.write('\n')
416    fh.close()
417
418
419def main_entry():
420  """
421   Wrapper around main() for use with setuptools.
422
423  """
424  if sys.argv[1] == '--sum':
425      import summary
426      summary.main()
427  elif sys.argv[1] == '-v':
428      from versionConfig import version, versionComment
429      print 'ceda-cc version %s [%s]' % (version,versionComment)
430  elif sys.argv[1] == '--unitTest':
431      print "Starting test suite 1"
432      import unitTestsS1
433      print "Starting test suite 2"
434      import unitTestsS2
435      print "Tests completed"
436  else:
437     cmdl = string.join( sys.argv )
438     main(printInfo=True, cmdl=cmdl)
439
440if __name__ == '__main__':
441  main_entry()
442
443
444##else:
445  ##f1 = '/data/u10/cordex/AFR-44/SMHI/ECMWF-ERAINT/evaluation/SMHI-RCA4/v1/day/clh/clh_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_SMHI-RCA4_v1_day_19810101-19851231.nc'
446  ##f2 = '/data/u10/cordex/AFR-44/SMHI/ECMWF-ERAINT/evaluation/SMHI-RCA4/v1/sem/tas/tas_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_SMHI-RCA4_v1_sem_200012-201011.nc'
447  ##f3 = '/data/u10/cordex/AFR-44i/SMHI/ECMWF-ERAINT/evaluation/SMHI-RCA4/v1/mon/tas/tas_AFR-44i_ECMWF-ERAINT_evaluation_r1i1p1_SMHI-RCA4_v1_mon_199101-200012.nc'
448  ##cc.checkFile( f3 )
Note: See TracBrowser for help on using the repository browser.