Changeset 326


Ignore:
Timestamp:
19/05/15 12:55:38 (5 years ago)
Author:
mjuckes
Message:

drastically reduced content of c4.py -- moved code to c4_run.py

Location:
CCCC/trunk/ceda_cc
Files:
1 added
4 edited

Legend:

Unmodified
Added
Removed
  • CCCC/trunk/ceda_cc/c4.py

    r325 r326  
    44""" 
    55import sys 
    6 from ccinit import c4_init 
    7  
    8 testmain=False 
    9 ## callout to summary.py: if this option is selected, imports of libraries are not needed. 
    10 if not testmain: 
    11   if __name__ == '__main__': 
    12    if len(sys.argv) > 1: 
    13      if sys.argv[1] == '--sum': 
    14         import summary 
    15         summary.main() 
    16         raise SystemExit(0) 
    17      elif sys.argv[1] == '-v': 
    18         from versionConfig import version, versionComment 
    19         print 'ceda-cc version %s [%s]' % (version,versionComment) 
    20         raise SystemExit(0) 
    21      elif sys.argv[1] == '--unitTest': 
    22         print "Starting test suite 1" 
    23         import unitTestsS1 
    24         print "Starting test suite 2" 
    25         import unitTestsS2 
    26         print "Tests completed" 
    27         raise SystemExit(0) 
    28    else: 
    29      print __doc__ 
    30      raise SystemExit(0) 
    31  
    32 # Standard library imports 
    33 import os, string, time, glob, pkgutil 
    34 import shutil 
    35 ## pkgutil is used in file_utils 
    36 # Third party imports 
    37  
    38 ## Local imports with 3rd party dependencies 
    39 #### netcdf --- currently only support for cmds2 -- re-arranged to facilitate support for alternative modules 
    40  
    41 import file_utils 
    42  
    43 from file_utils import fileMetadata, ncLib 
    44  
    45 # Local imports 
    46 import utils_c4 as utils 
    47 import config_c4 as config 
    48  
    49 reload( utils ) 
    50  
    51 from xceptions import baseException 
    52  
    53 from fcc_utils2 import tupsort 
    54  
    55  
    56 #driving_model_ensemble_member = <CMIP5Ensemble_member> 
    57 #rcm_version_id = <RCMVersionID>                      
    58  
    59 class dummy(object): 
    60   def __init__(self): 
    61      self.experimental = None 
    62      self.parent = None 
    63  
    64 pathTmplDict = { 'CORDEX':'%(project)s/%(product)s/%(domain)s/%(institute)s/%(driving_model)s/%(experiment)s/%(ensemble)s/%(model)s/%(model_version)s/%(frequency)s/%(variable)s/files/%%(version)s/',   \ 
    65                  'SPECS':'%(project)s/%(product)s/%(institute)s/%(model)s/%(experiment)s/%(start_date)s/%(frequency)s/%(realm)s/%(table)s/%(variable)s/%(ensemble)s/files/%%(version)s/', \ 
    66                  'CMIP5':'%(project)s/%(product)s/%(institute)s/%(model)s/%(experiment)s/%(frequency)s/%(realm)s/%(table)s/%(ensemble)s/files/%%(version)s/%(variable)s/', \ 
    67                  'CCMI':'%(project)s/%(product)s/%(institute)s/%(model)s/%(experiment)s/%(frequency)s/%(realm)s/%(table)s/%(ensemble)s/files/%%(version)s/%(variable)s/', \ 
    68                  'ESA-CCI':'%(level)s/%(platform)s/%(sensor)s/%(variable)s/', \ 
    69                  '__def__':'%(project)s/%(product)s/%(institute)s/%(model)s/%(experiment)s/%(frequency)s/%(realm)s/%(variable)s/%(ensemble)s/files/%%(version)s/', \ 
    70                } 
    71  
    72 ## Core DRS: list of vocab names 
    73 ## Path template: -- current version puts upper case in "project" 
    74 ## Dataset template:   
    75  
    76 class recorder(object): 
    77  
    78   def __init__(self,project,fileName,type='map',dummy=False): 
    79     self.dummy = dummy 
    80     self.file = fileName 
    81     self.type = type 
    82     self.pathTmpl = '%(project)s/%(product)s/%(domain)s/%(institute)s/%(driving_model)s/%(experiment)s/%(ensemble)s/%(model)s/%(model_version)s/%(frequency)s/%(variable)s/files/%%(version)s/' 
    83     self.pathTmpl = pathTmplDict.get(project,pathTmplDict['__def__']) 
    84     self.records = {} 
    85     self.tidtupl = [] 
    86  
    87   def open(self): 
    88     if self.type == 'map': 
    89       self.fh = open( self.file, 'a' ) 
    90     else: 
    91       self.sh = shelve.open( self.file ) 
    92  
    93   def close(self): 
    94     if self.type == 'map': 
    95       self.fh.close() 
    96     else: 
    97       self.sh.close() 
    98  
    99   def add(self,fpath,drs,safe=True): 
    100     assert self.type == 'map','Can only do map files at present' 
    101     assert type(drs) == type( {} ), '2nd user argument to method add should be a dictionary [%s]' % type(drs) 
    102     tpath = self.pathTmpl % drs 
    103     if not self.dummy: 
    104       assert os.path.isfile( fpath ), 'File %s not found' % fpath 
    105       fdate = time.ctime(os.path.getmtime(fpath)) 
    106       sz = os.stat(fpath).st_size 
    107     else: 
    108       fdate = "na" 
    109       sz = 0 
    110     record = '%s | OK | %s | modTime = %s | target = %s ' % (fpath,sz,fdate,tpath) 
    111     fn = string.split( fpath, '/' )[-1] 
    112     for k in ['creation_date','tracking_id']: 
    113       if k in drs.keys(): 
    114         record += ' | %s = %s' % (k,drs[k]) 
    115         if k == 'tracking_id': 
    116           self.tidtupl.append( (fn,drs[k]) ) 
    117  
    118     self.records[fn] = record 
    119    
    120   def modify(self,fn,msg): 
    121     assert fn in self.records.keys(),'Attempt to modify non-existent record %s, %s' % [fn,str(self.records.keys()[0:10])] 
    122     if string.find( self.records[fn], '| OK |') == -1: 
    123       ##print 'File %s already flagged with errors' % fn 
    124       return 
    125     s = string.replace( self.records[fn], '| OK |', '| %s |' % msg ) 
    126     ##print '--> ',s 
    127     self.records[fn] = s 
    128  
    129   def checktids(self): 
    130 ## sort by tracking id 
    131     if len( self.tidtupl ) == 1: 
    132       return 
    133     self.tidtupl.sort( cmp=tupsort(k=1).cmp ) 
    134     nd = 0 
    135     fnl = [] 
    136     for k in range(len(self.tidtupl)-1): 
    137       if self.tidtupl[k][1] == self.tidtupl[k+1][1]: 
    138         print 'Duplicate tracking_id: %s, %s:: %s' % (self.tidtupl[k][0],self.tidtupl[k+1][0],self.tidtupl[k][1]) 
    139         nd += 1 
    140         if len(fnl) == 0 or fnl[-1] != self.tidtupl[k][0]: 
    141           fnl.append( self.tidtupl[k][0]) 
    142         fnl.append( self.tidtupl[k+1][0]) 
    143     if nd == 0: 
    144       print 'No duplicate tracking ids found in %s files' % len(self.tidtupl) 
    145     else: 
    146       print '%s duplicate tracking ids' % nd 
    147       for f in fnl: 
    148         self.modify( f, 'ERROR: duplicate tid' ) 
    149  
    150   def dumpAll(self,safe=True): 
    151     keys = self.records.keys() 
    152     keys.sort() 
    153     for k in keys: 
    154       self.dump( self.records[k], safe=safe ) 
    155  
    156   def dump( self, record, safe=True ): 
    157     if safe: 
    158       self.open() 
    159     self.fh.write( record + '\n' ) 
    160     if safe: 
    161       self.close() 
    162  
    163   def addErr(self,fpath,reason,safe=True): 
    164     record = '%s | %s' % (fpath, reason) 
    165     fn = string.split( fpath, '/' )[-1] 
    166     self.records[fn] = record 
    167  
    168 class checker(object): 
    169   def __init__(self, pcfg, cls,reader,abortMessageCount=-1,experimental=False): 
    170     self.info = dummy() 
    171     self.info.pcfg = pcfg 
    172     self.info.fileIsFixed = None 
    173     self.info.abortMessageCount = abortMessageCount 
    174     self.info.experimental = experimental 
    175     self.calendar = 'None' 
    176     self.ncReader = reader 
    177     self.cfn = utils.checkFileName( parent=self.info,cls=cls) 
    178     self.cga = utils.checkGlobalAttributes( parent=self.info,cls=cls) 
    179     self.cgd = utils.checkStandardDims( parent=self.info,cls=cls) 
    180     self.cgg = utils.checkGrids( parent=self.info,cls=cls) 
    181     self.cls = cls 
    182  
    183     # Define vocabs based on project 
    184     ##self.vocabs = getVocabs(pcgf) 
    185     self.vocabs = pcfg.vocabs 
    186  
    187   def checkFile(self,fpath,log=None,attributeMappings=[]): 
    188     self.calendar = 'None' 
    189     self.info.log = log 
    190  
    191     fn = string.split( fpath, '/' )[-1] 
    192  
    193     if attributeMappings != []: 
    194       self.ncReader.loadNc( fpath ) 
    195       self.ncReader.applyMap( attributeMappings, self.cfn.globalAttributesInFn, log=log ) 
    196       ncRed = True 
    197       thisFn = self.ncReader.fn 
    198     else: 
    199       ncRed = False 
    200       thisFn = fn 
    201  
    202     self.cfn.check( thisFn ) 
    203     if not self.cfn.completed: 
    204       self.completed = False 
    205       return 
    206     if not self.info.pcfg.projectV.id[:2] == '__': 
    207       if not os.path.isfile( fpath ): 
    208         print 'File %s not found [2]' % fpath 
    209         self.completed = False 
    210         return 
    211  
    212     if not ncRed: 
    213       ##print fpath 
    214       self.ncReader.loadNc( fpath ) 
    215     self.ga = self.ncReader.ga 
    216     self.va = self.ncReader.va 
    217     self.da = self.ncReader.da 
    218  
    219     if self.cfn.freq != None: 
    220       vGroup = self.cfn.freq 
    221     else: 
    222       vGroup = self.info.pcfg.mipVocabVgmap.get(self.cfn.group,self.cfn.group) 
    223     self.cga.check( self.ga, self.va, self.cfn.var, vGroup, self.vocabs, self.cfn.fnParts ) 
    224     if not self.cga.completed: 
    225       self.completed = False 
    226       return 
    227  
    228     ##self.cgd.plevRequired = config.plevRequired 
    229     ##self.cgd.plevValues = config.plevValues 
    230     ##self.cgd.heightRequired = config.heightRequired 
    231     ##self.cgd.heightValues = config.heightValues 
    232     ##self.cgd.heightRange = config.heightRange 
    233     self.cgd.check( self.cfn.var, self.cfn.freq, self.da, self.va, self.cga.isInstantaneous, self.vocabs ) 
    234     self.calendar = self.cgd.calendar 
    235     if not self.cgd.completed: 
    236       self.completed = False 
    237       return 
    238  
    239     if self.info.pcfg.doCheckGrids: 
    240       ##self.cgg.rotatedPoleGrids = config.rotatedPoleGrids 
    241       ##self.cgg.interpolatedGrids = config.interpolatedGrids 
    242       self.cgg.check( self.cfn.var, self.cfn.domain, self.da, self.va ) 
    243      
    244       if not self.cgg.completed: 
    245         self.completed = False 
    246         return 
    247     self.completed = True 
    248     self.drs = self.cga.getDrs() 
    249     self.drs['project'] = self.info.pcfg.projectV.id 
    250     self.errorCount = self.cfn.errorCount + self.cga.errorCount + self.cgd.errorCount + self.cgg.errorCount 
    251  
    252 class main(object): 
    253  
    254   def __init__(self,args=None,abortMessageCount=-1,printInfo=False,monitorFileHandles = False,cmdl=None): 
    255     logDict = {} 
    256     ecount = 0 
    257     c4i = c4_init(args=args) 
    258     c4i.logger.info( 'Starting batch -- number of file: %s' % (len(c4i.flist)) ) 
    259     c4i.logger.info( 'Source: %s' % c4i.source ) 
    260     if cmdl != None: 
    261       c4i.logger.info( 'Command: %s' % cmdl ) 
    262        
    263     isDummy  = c4i.project[:2] == '__' 
    264     if (ncLib == None) and (not isDummy): 
    265        raise baseException( 'Cannot proceed with non-dummy [%s] project without a netcdf API' % (c4i.project) ) 
    266     pcfg = config.projectConfig( c4i.project ) 
    267     assert pcfg.projectV.v == -1, 'Cannot handle anything other than latest version at present' 
    268     ncReader = fileMetadata(dummy=isDummy, attributeMappingsLog=c4i.attributeMappingsLog,forceLib=c4i.forceNetcdfLib) 
    269     c4i.logger.info( 'Python netcdf: %s' % ncReader.ncLib ) 
    270     self.cc = checker(pcfg, c4i.project, ncReader,abortMessageCount=abortMessageCount, experimental=c4i.experimental) 
    271     rec = recorder( c4i.project, c4i.recordFile, dummy=isDummy ) 
    272     self.ncLib = ncLib 
    273  
    274     # This list will record the drs dictionaries of all checked files for export to JSON 
    275     drs_list = [] 
    276  
    277     if monitorFileHandles: 
    278       self.monitor = utils.sysMonitor() 
    279     else: 
    280       self.monitor = None 
    281  
    282     cal = None 
    283     if len( c4i.errs ) > 0: 
    284       for i in range(0,len( c4i.errs ), 2 ): 
    285         c4i.logger.info( c4i.errs[i] ) 
    286    
    287     self.cc.info.amapListDraft = [] 
    288     cbv = utils.checkByVar( parent=self.cc.info,cls=c4i.project,monitor=self.monitor) 
    289     if c4i.project not in ['ESA-CCI']: 
    290       cbv.impt( c4i.flist ) 
    291       if printInfo: 
    292         print cbv.info 
    293  
    294     fileLogOpen = False 
    295     self.resList =  [] 
    296     stdoutsum = 2000 
    297     npass = 0 
    298     kf = 0 
    299     for f in c4i.flist: 
    300       kf += 1 
    301       rv = False 
    302       ec = None 
    303       if monitorFileHandles: 
    304         nofhStart = self.monitor.get_open_fds() 
    305       fn = string.split(f,'/')[-1] 
    306       c4i.logger.info( 'Starting: %s' % fn ) 
    307       try: 
    308   ### need to have a unique name, otherwise get mixing of logs despite close statement below. 
    309   ### if duplicate file names are present, this will be recorded in the main log, tag appended to file level log name (not yet tested). 
    310         if c4i.logByFile: 
    311           fLogger = c4i.getFileLog( fn ) 
    312           logDict[fn] = c4i.fileLogfile 
    313           c4i.logger.info( 'Log file: %s' % c4i.fileLogfile ) 
    314           fileLogOpen = True 
    315         else: 
    316           fLogger = c4i.logger 
    317    
    318         fLogger.info( 'Starting file %s' % fn ) 
    319 ## default appending to myapp.log; mode='w' forces a new file (deleting old contents). 
    320         self.cc.checkFile( f, log=fLogger,attributeMappings=c4i.attributeMappings ) 
    321  
    322         if self.cc.completed: 
    323           if cal not in (None, 'None') and self.cc.cgd.varGroup != "fx": 
    324             if cal != self.cc.calendar: 
    325               cal_change_err_msg = 'Error: change in calendar attribute %s --> %s' % (cal, self.cc.calendar) 
    326               c4i.logger.info(cal_change_err_msg) 
    327               fLogger.info(cal_change_err_msg) 
    328               self.cc.errorCount += 1 
    329  
    330           cal = self.cc.calendar 
    331           ec = self.cc.errorCount 
    332         rv =  ec == 0 
    333         if rv: 
    334           npass += 1 
    335         self.resList.append( (rv,ec) ) 
    336  
    337         if c4i.logByFile: 
    338           if self.cc.completed: 
    339             fLogger.info( 'Done -- error count %s' % self.cc.errorCount ) 
    340           else: 
    341             fLogger.info( 'Done -- checks not completed' ) 
    342           c4i.closeFileLog( ) 
    343           fileLogOpen = False 
    344  
    345         if self.cc.completed: 
    346           c4i.logger.info( 'Done -- error count %s' % self.cc.errorCount )  
    347           ecount += self.cc.errorCount 
    348           if self.cc.errorCount == 0: 
    349             rec.add( f, self.cc.drs ) 
    350             drs_list.append({'path': f, 'drs': self.cc.drs}) 
    351           else: 
    352             rec.addErr( f, 'ERRORS FOUND | errorCount = %s' % self.cc.errorCount ) 
    353         else: 
    354           ecount += 20 
    355           c4i.logger.info( 'Done -- testing aborted because of severity of errors' ) 
    356           rec.addErr( f, 'ERRORS FOUND AND CHECKS ABORTED' ) 
    357       except: 
    358         c4i.logger.error("Exception has occured" ,exc_info=1) 
    359         if fileLogOpen: 
    360           fLogger.error("C4.100.001: [exception]: FAILED:: Exception has occured" ,exc_info=1) 
    361           c4i.closeFileLog( ) 
    362           fileLogOpen = False 
    363         rec.addErr( f, 'ERROR: Exception' ) 
    364         if not c4i.holdExceptions: 
    365           raise 
    366       if stdoutsum > 0 and kf%stdoutsum == 0: 
    367          print '%s files checked; %s passed this round' % (kf,npass) 
    368       if monitorFileHandles: 
    369         nofhEnd = self.monitor.get_open_fds() 
    370         if nofhEnd > nofhStart: 
    371            print 'Open file handles: %s --- %s' % (nofhStart, nofhEnd) 
    372    
    373     self.cc.info.log = c4i.logger 
    374      
    375     if c4i.project not in ['SPECS','CCMI','CMIP5','ESA-CCI']: 
    376        cbv.c4i = c4i 
    377        cbv.setLogDict( logDict ) 
    378        cbv.check( recorder=rec, calendar=self.cc.calendar) 
    379        try: 
    380          ecount += cbv.errorCount 
    381        except: 
    382          ecount = None 
    383     ncReader.close() 
    384     if type( self.cc.info.amapListDraft ) == type( [] ) and len(  self.cc.info.amapListDraft ) > 0: 
    385       ll =  self.cc.info.amapListDraft 
    386       ll.sort() 
    387       oo = open( 'amapDraft.txt', 'w' ) 
    388       oo.write( ll[0] + '\n' ) 
    389       for i in range( 1,len(ll) ): 
    390         if ll[i] != ll[i-1]: 
    391           oo.write( ll[i] + '\n' ) 
    392       oo.close() 
    393     if c4i.project in ['SPECS','CCMI','CMIP5']: 
    394       rec.checktids() 
    395     rec.dumpAll() 
    396  
    397     #!TODO: the recorder class could export JSON if it recorded the full drs dictionaries. 
    398     #       This lightweight solution re-uses the filename from the rec class and dumps 
    399     #       JSON in a separate function. 
    400     json_file = os.path.splitext(rec.file)[0] + '.json' 
    401     dump_drs_list(drs_list, json_file) 
    402  
    403     if printInfo: 
    404       print 'Error count %s' % ecount 
    405     ##c4i.hdlr.close() 
    406     c4i.closeBatchLog() 
    407     self.ok = all( map( lambda x: x[0], self.resList ) ) 
    408  
    409  
    410 def dump_drs_list(drs_list, filename): 
    411     import json 
    412     fh = open(filename, 'a+') 
    413     for drs in drs_list: 
    414                 fh.write(json.dumps(drs)) 
    415                 fh.write('\n') 
    416     fh.close() 
    417  
    4186 
    4197def main_entry(): 
     
    43523      print "Tests completed" 
    43624  else: 
     25     from c4_run import main 
    43726     cmdl = string.join( sys.argv ) 
    43827     main(printInfo=True, cmdl=cmdl) 
     
    44029if __name__ == '__main__': 
    44130  main_entry() 
    442  
    443  
    444 ##else: 
    445   ##f1 = '/data/u10/cordex/AFR-44/SMHI/ECMWF-ERAINT/evaluation/SMHI-RCA4/v1/day/clh/clh_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_SMHI-RCA4_v1_day_19810101-19851231.nc' 
    446   ##f2 = '/data/u10/cordex/AFR-44/SMHI/ECMWF-ERAINT/evaluation/SMHI-RCA4/v1/sem/tas/tas_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_SMHI-RCA4_v1_sem_200012-201011.nc' 
    447   ##f3 = '/data/u10/cordex/AFR-44i/SMHI/ECMWF-ERAINT/evaluation/SMHI-RCA4/v1/mon/tas/tas_AFR-44i_ECMWF-ERAINT_evaluation_r1i1p1_SMHI-RCA4_v1_mon_199101-200012.nc' 
    448   ##cc.checkFile( f3 ) 
  • CCCC/trunk/ceda_cc/config_c4.py

    r313 r326  
     1"""config_c4 
     2########## 
     3USAGE: 
     4import config_c4 
     5 
     6This module sets some basic variables, including some vocabulary lists. 
     7""" 
    18import string 
    29import utils_c4 as utils 
     
    113120 
    114121class readVocab(object): 
     122  """readVocab: 
     123  A general class to read in vocabulary lists ("code lists" in ISO 19115 terminology) from a variety of structured text files. 
     124  """ 
    115125 
    116126  def __init__(self,dir): 
     
    178188 
    179189class projectConfig(object): 
     190  """projectConfig: 
     191  Set project specific configuration options. 
     192   
     193  USAGE 
     194  ===== 
     195  pcfg = projectConfig( <project id>[, version=..] ) 
     196 
     197  Creates a "pcfg" object which contains attributes used in the code, including vocabulary lists. 
     198  """ 
    180199 
    181200  def __init__(self, project, version=-1): 
  • CCCC/trunk/ceda_cc/extractMipInfo.py

    r324 r326  
    1313 
    1414heightRequired = ['tas','tasmax','tasmin','huss','sfcWind','sfcWindmax','wsgsmax','uas','vas'] 
    15 cmip5_ignore = ['pfull','phalf','depth','depth_c','eta','nsigma','vertices_latitude','vertices_longitude','ztop','ptop','p0','z1','z2','href','k_c','a','a_bnds','ap','ap_bnds','b','b_bnds','sigma','sigma_bnds','zlev','zlev_bnds','zfull','zhalf'] 
     15cmip5_ignore = ['depth','depth_c','eta','nsigma','vertices_latitude','vertices_longitude','ztop','ptop','p0','z1','z2','href','k_c','a','a_bnds','ap','ap_bnds','b','b_bnds','sigma','sigma_bnds','zlev','zlev_bnds'] 
    1616cmip5AxesAtts = ['axis', 'bounds_values', 'climatology', 'coords_attrib', 'formula', 'index_only', 'long_name', 'must_call_cmor_grid', 'must_have_bounds', 'out_name', 'positive', 'requested', 'requested_bounds', 'standard_name', 'stored_direction', 'tolerance', 'type', 'units', 'valid_max', 'valid_min', 'value', 'z_bounds_factors', 'z_factors'] 
    1717 
  • CCCC/trunk/ceda_cc/unitTestsS2.py

    r304 r326  
    22import utils_c4 
    33import config_c4 as config 
    4 from c4 import fileMetadata, dummy, main 
     4from c4_run import fileMetadata, dummy, main 
    55from xceptions import * 
    66 
Note: See TracChangeset for help on using the changeset viewer.