Changeset 262 for CCCC


Ignore:
Timestamp:
18/12/14 16:07:42 (5 years ago)
Author:
mjuckes
Message:

Initial configuration for ESA CCI: file name parsing

Location:
CCCC/trunk/ceda_cc
Files:
1 added
5 edited

Legend:

Unmodified
Added
Removed
  • CCCC/trunk/ceda_cc/config_c4.py

    r253 r262  
    173173               'project_id':utils.listControl( 'project_id', ['CCMI'] ) } 
    174174 
     175  elif pcfg.projectV.id == 'ESA-CCI': 
     176    lrdr = readVocab( 'esacci_vocabs/') 
     177    vocabs = { 'variable':utils.mipVocab(pcfg,dummy=True), \ 
     178               'level':utils.listControl( 'level', lrdr.getSimpleList( 'procLevel01.txt', bit=0 ) ) \ 
     179             } 
    175180  elif pcfg.projectV.id == '__dummy': 
    176181    vocabs = { 'variable':utils.mipVocab(pcfg,dummy=True) } 
     
    192197 
    193198  def __init__(self, project, version=-1): 
    194     knownProjects = ['CMIP5','CCMI','CORDEX','SPECS','__dummy'] 
     199    knownProjects = ['CMIP5','CCMI','CORDEX','SPECS','ESA-CCI', '__dummy'] 
    195200    assert project in knownProjects, 'Project %s not in knownProjects %s' % (project, str(knownProjects)) 
    196201 
    197202    self.project = project 
     203    self.fNameSep = '_' 
    198204    self.projectV = NT_project(project,version) 
    199205    self.gridSpecTol = 0.01 
     206## default encoding of time range in file names: YYYY[MM[DD[HH]]]-YYYY[MM[DD[HH]]] 
     207    self.trangeType = 'CMIP' 
     208    self.controlledFnParts = [] 
    200209    if project == 'CORDEX': 
    201210      self.requiredGlobalAttributes = [ 'institute_id', 'contact', 'rcm_version_id', 'product', 'CORDEX_domain', 'creation_date', \ 
     
    254263                        'project':'project_id'} 
    255264 
     265    elif project == 'ESA-CCI': 
     266      self.fNameSep = '-' 
     267      self.requiredGlobalAttributes = map( lambda x: 'ga%s' % x, range(10) ) 
     268      self.controlledGlobalAttributes = [ ] 
     269      self.controlledFnParts = ['level'] 
     270      self.requiredVarAttributes = ['long_name', 'standard_name', 'units'] 
     271      self.drsMappings = {'variable':'@var'} 
     272      self.globalAttributesInFn = [None,] 
    256273    elif project == '__dummy': 
    257274      self.requiredGlobalAttributes = map( lambda x: 'ga%s' % x, range(10) ) 
     
    308325      ##self.domainIndex = None 
    309326      ##self.freqIndex = None 
     327    elif self.projectV.id == 'ESA-CCI': 
     328      self.fnParts = NT_fnParts( len=[7,8,9], fxLen=[0,],  unfLen=[7,8,9,], checkTLen=False, ixDomain=None, ixFreq=1 ) 
     329      self.trangeType = 'ESA-CCI' 
    310330    elif self.projectV.id == '__dummy': 
    311331      self.fnParts = NT_fnParts( len=[4,5], fxLen=[4,],  unfLen=[5,], checkTLen=False, ixDomain=None, ixFreq=1 ) 
    312       ##self.fnPartsOkLen = [4,5] 
    313       ##self.fnPartsOkFixedLen = [4,] 
    314       ##self.fnPartsOkUnfixedLen = [5,] 
    315       ##self.checkTrangeLen = False 
    316       ##self.domainIndex = None 
    317       ##self.freqIndex = 1 
     332 
    318333    self.fnPartsOkLen = self.fnParts.len 
    319334    self.fnPartsOkFixedLen = self.fnParts.fxLen 
     
    359374    elif self.project in ['CMIP5','CCMI','SPECS','__dummy']: 
    360375      self.groupIndex = 1 
     376    elif self.project in ['ESA-CCI']: 
     377      self.fnoptions = {'groupIndex':[3,1], 'trangeIndex':[0,-2] } 
     378      self.fnoptions['inFn'] = [[None,'*activity','*level','*project','*var','*product','*additional','*gdsv','*version'], 
     379                                ['*activity','*project','*level','*var','*additional',None,'*version']] 
     380##Indicative Date>[<Indicative Time>]-ESACCI-<Processing Level>_<CCI Project>-<Data Type>-<Product String>[- <Additional Segregator>][-v<GDS version>]-fv<File version>.nc 
     381##ESACCI-<CCI Project>-<Processing Level>-<Data Type>-<Product String>[-<Additional Segregator>]-<IndicativeDate>[<Indicative Time>]-fv<File version>.nc 
     382 
     383    self.trangeIndex = -1 
    361384 
    362385    self.vocabs = getVocabs(self) 
     
    367390          if "height2m" in self.vocabs['variable'].varcons[k][k2].get( '_dimension',[]): 
    368391            print 'config_c4: %s , %s: %s' % (k,k2,str(self.vocabs['variable'].varcons[k][k2]['_dimension'] ) ) 
    369        
    370392 
    371393    ##assert self.project != 'CCMI', 'Not completely set up for CCMI yet' 
     394 
     395  def setEsaCciFNType(self,id): 
     396      self.groupIndex =  self.fnoptions['groupIndex'][id] 
     397      self.trangeIndex = self.fnoptions['trangeIndex'][id] 
     398      self.globalAttributesInFn = self.fnoptions['inFn'][id] 
    372399 
    373400 
  • CCCC/trunk/ceda_cc/extractMipInfo.py

    r256 r262  
    11 
    2 import collections, glob, string, re, json 
     2import collections, glob, string, re, json, sys 
    33from fcc_utils2 import mipTableScan, snlist, tupsort 
    44from config_c4 import CC_CONFIG_DIR 
     
    1919snl, snla = snc.gen_sn_list( ) 
    2020NT_mip = collections.namedtuple( 'mip',['label','dir','pattern'] ) 
     21NT_var = collections.namedtuple( 'var',['name','sn','snStat','realm','units','longName','comment','mip'] ) 
    2122NT_canvari = collections.namedtuple( 'canonicalVariation',['conditions','text', 'ref'] ) 
    2223vlist = [ 
     
    365366    self.type2Atts = ['positive','comment', 'long_name', 'modeling_realm', 'out_name', 'standard_name', 'type', 'units'] 
    366367    self.type3Atts = ['positive','long_name','modeling_realm', 'out_name', 'standard_name', 'type', 'units'] 
    367     self.type4Atts = ['positive','modeling_realm', 'out_name', 'standard_name', 'type', 'units'] 
     368    self.type4Atts = ['positive','modeling_realm', 'standard_name', 'type', 'units'] 
    368369    self.m = m 
    369370    vars = m.vars 
     
    476477       out_name: %(out_name)s; type: %(type)s <br/> 
    477478""" 
    478     fixedType5TmplB = "<li>%s [%s]: %s, %s [%s]: %s</li>\n" 
     479    fixedType5TmplB = "<li>%s [%s]: %s, %s [%s]: %s, %s:: %s, %s</li>\n" 
    479480         
    480481    if typecode == 1: 
     
    524525      thistype,h2,al,tmplA,tmplB = { 3:(self.type3,"Variables with varying comment",['long_name','comment','cell_methods'], fixedType3TmplA, fixedType3TmplB), 
    525526                      4:(self.type4,"Variables with varying long_name",['long_name','cell_methods'],fixedType3TmplA, fixedType4TmplB), 
    526                       5:(self.type5,"Remaining variables",['standard_name','long_name','cell_methods','realm'],fixedType5TmplA, fixedType5TmplB) }[typecode] 
     527                      5:(self.type5,"Remaining variables",['standard_name','long_name','out_name', 'modeling_realm','positive','type','units'],fixedType5TmplA, fixedType5TmplB) }[typecode] 
     528 ##['positive','modeling_realm', 'out_name', 'standard_name', 'type', 'units'] 
    527529      thistype.sort() 
    528530      oo.write( '<h2>%s</h2>\n' % h2 ) 
     
    546548cordex_mip = NT_mip( 'cordex', 'cordex_vocabs/mip/', 'CORDEX_*') 
    547549specs_mip = NT_mip( 'specs', 'specs_vocabs/mip/', 'SPECS_*') 
    548 mips = ( cordex_mip, NT_mip( 'ccmi', 'ccmi_vocabs/mip/', 'CCMI1_*'), NT_mip( 'cmip5','cmip5_vocabs/mip/', 'CMIP5_*' ), ) 
    549550mips = ( cordex_mip, ) 
    550551mips = ( specs_mip, ) 
    551552mips = ( NT_mip( 'ccmi', 'ccmi_vocabs/mip/', 'CCMI1_*'),  ) 
    552553mips = ( NT_mip( 'cmip5','cmip5_vocabs/mip/', 'CMIP5_*' ), ) 
     554mips = ( cordex_mip, NT_mip( 'ccmi', 'ccmi_vocabs/mip/', 'CCMI1_*'), NT_mip( 'cmip5','cmip5_vocabs/mip/', 'CMIP5_*' ), specs_mip) 
    553555m = mipCo( mips )   
    554556h = helper() 
     
    574576 
    575577cmip5AxesAtts = ['axis', 'bounds_values', 'climatology', 'coords_attrib', 'formula', 'index_only', 'long_name', 'must_call_cmor_grid', 'must_have_bounds', 'out_name', 'positive', 'requested', 'requested_bounds', 'standard_name', 'stored_direction', 'tolerance', 'type', 'units', 'valid_max', 'valid_min', 'value', 'z_bounds_factors', 'z_factors'] 
     578 
     579def getTupList(m): 
     580  vl = [] 
     581  keys = m.vdict.keys() 
     582  keys.sort() 
     583  for k in keys: 
     584    for t in m.vdict[k]: 
     585##NT_var = collections.namedtuple( 'mip',['name','sn','snStat','realm','units','longName','comment'] ) 
     586      sn, r, units, ln, c = map( lambda x: m.td[t][k][1].get(x,None), ['standard_name','modeling_realm','units','long_name','comment'] )  
     587      mipid = string.split(t,'_')[0] 
     588      if c == '': 
     589        c = None 
     590      v = NT_var( k, sn, 'exists', r, units, ln, c,mipid ) 
     591      vl.append(v) 
     592  return vl 
     593 
     594tl = getTupList(m) 
     595tl1 = uniquify(tl) 
     596tl2 = [tl1[0],] 
     597for t in tl1[1:]: 
     598  if t[:7] == tl2[-1][:7]: 
     599    pass 
     600  elif t[:3] == tl2[-1][:3] and t[4:6] == tl2[-1][4:6]: 
     601    if (t.mip == 'CMIP5' and tl2[-1].mip == 'CCMI1') or (t.mip == 'CCMI1' and tl2[-1].mip == 'CMIP5'): 
     602      tl2[-1] = t 
     603    else: 
     604      print 'What to do??' 
     605      print tl2[-1] 
     606      print t 
     607  else: 
     608    tl2.append(t) 
     609print len(tl),len(tl1) 
     610for t in tl1[:20]: 
     611  print t 
    576612 
    577613v = runcheck1( m, ald, isAxes=False ) 
  • CCCC/trunk/ceda_cc/unitTestsS1.py

    r253 r262  
    2525pcmip5 = dummy() 
    2626pccmi = dummy() 
    27 for x in (p,ps,pcmip5,pccmi): 
     27pcci = dummy() 
     28for x in (p,ps,pcmip5,pccmi,pcci): 
    2829  x.log = log 
    2930  x.abortMessageCount = -1 
     
    3233pcmip5.pcfg = config.projectConfig( "CMIP5" ) 
    3334pccmi.pcfg = config.projectConfig( "CCMI" ) 
     35pcci.pcfg = config.projectConfig( "ESA-CCI" ) 
    3436 
    3537 
     
    108110else: 
    109111  print 'Failed [%s] %s: valid SPECS file name' % (module,fn) 
     112 
     113fn = "20120101015548-ESACCI-L3U-GHRSST-SSTskin-AATSR-LT-v02.0-fv01.1.nc" 
     114c = utils_c4.checkFileName(parent=pcci) 
     115c.check(fn) 
     116if c.errorCount == 0: 
     117  print 'OK: [%s] %s: valid ESA-CCI file name' % (module,fn) 
     118else: 
     119  print 'Failed [%s] %s: valid ESA-CCI file name' % (module,fn) 
     120 
     121fn = "20120101015548-ESACCI-L3U_GHRSST-SSTskin-AATSR-LT-v02.0-fv01.1.nc" 
     122c = utils_c4.checkFileName(parent=pcci) 
     123c.check(fn) 
     124if c.errorCount == 0: 
     125  print 'Failed: [%s] %s: Passed invalid ESA-CCI file name' % (module,fn) 
     126else: 
     127  print 'OK [%s] %s: Detected invalid ESA-CCI file name' % (module,fn) 
    110128 
    111129 
  • CCCC/trunk/ceda_cc/utils_c4.py

    r253 r262  
    196196    self.checkId = 'unset' 
    197197    self.step = 'Initialised' 
    198     self.checks = (self.do_check_fn,) 
     198    self.checks = (self.do_check_fn,self.do_check_fnextra) 
     199    self.re_c1 = re.compile( '^[0-9]*$' ) 
    199200#### 
    200201 
     
    203204    assert type(fn) in [type('x'),type(u'x')], '1st argument to "check" method of checkGrids shound be a string variable name (not %s)' % type(fn) 
    204205    self.fn = fn 
     206    self.fnsep = self.pcfg.fNameSep 
    205207 
    206208    self.runChecks() 
     
    214216    self.checkId = ('001','parse_filename') 
    215217    self.test( fn[-3:] == '.nc', 'File name ending ".nc" expected', abort=True, part=True ) 
    216     bits = string.split( fn[:-3], '_' ) 
     218    bits = string.split( fn[:-3], self.fnsep ) 
    217219    self.fnParts = bits[:] 
    218220 
     
    224226    self.test( len(bits) in self.pcfg.fnPartsOkLen, 'File name not parsed in %s elements [%s]' % (str(self.pcfg.fnPartsOkLen),str(bits)), abort=True ) 
    225227 
     228    if self.pcfg.projectV.id in ['ESA-CCI']: 
     229      self.test( 'ESACCI' in bits[:2], 'File name not a valid ESA-CCI file name: %s' % fn, abort=True ) 
     230      if bits[0] == 'ESACCI': 
     231        self.esaFnId = 1 
     232      else: 
     233        self.esaFnId = 0 
     234      self.pcfg.setEsaCciFNType(self.esaFnId) 
     235 
     236    self.fnDict = {} 
     237    if self.pcfg.projectV.id in ['ESA-CCI']: 
     238      l0 = {0:6, 1:5}[self.esaFnId]   
     239      for i in range(l0): 
     240        x = self.pcfg.globalAttributesInFn[i] 
     241        if x != None and x[0] == '*': 
     242          self.fnDict[x[1:]] = bits[i] 
     243      self.fnDict['version'] = bits[-1] 
     244      if self.esaFnId == 0: 
     245        if len(bits) == 9: 
     246          self.fnDict['additional'] = bits[-3] 
     247          self.fnDict['gdsv'] = bits[-2] 
     248        elif len(bits) == 8: 
     249          if bits[-2][0] == 'v': 
     250            self.fnDict['gdsv'] = bits[-2] 
     251          else: 
     252            self.fnDict['additional'] = bits[-2] 
     253      elif self.esaFnId == 1: 
     254        if len(bits) == 8: 
     255          self.fnDict['additional'] = bits[-3] 
     256         
    226257    if self.pcfg.groupIndex != None: 
    227258      self.group = self.fnParts[self.pcfg.groupIndex] 
     
    251282 
    252283## test time segment 
    253       bits = string.split( self.fnParts[-1], '-' ) 
    254       self.test( len(bits) == 2, 'File time segment [%s] will not parse into 2 elements' % (self.fnParts[-1] ), abort=True, part=True ) 
    255  
    256       self.test(  len(bits[0]) == len(bits[1]), 'Start and end time specified in file name [%s] of unequal length' % (self.fnParts[-1] ), abort=True, part=True  ) 
    257  
    258       for b in bits: 
    259         self.test( self.isInt(b), 'Time segment in filename [%s] contains non integer characters' % (self.fnParts[-1] ),  abort=True, part=True  ) 
    260       self.log_pass() 
    261       self.fnTimeParts = bits[:] 
     284      if self.pcfg.trangeType == 'CMIP': 
     285        bits = string.split( self.fnParts[-1], '-' ) 
     286        self.test( len(bits) == 2, 'File time segment [%s] will not parse into 2 elements' % (self.fnParts[-1] ), abort=True, part=True ) 
     287 
     288        self.test(  len(bits[0]) == len(bits[1]), 'Start and end time specified in file name [%s] of unequal length' % (self.fnParts[-1] ), abort=True, part=True  ) 
     289 
     290        for b in bits: 
     291          self.test( self.isInt(b), 'Time segment in filename [%s] contains non integer characters' % (self.fnParts[-1] ),  abort=True, part=True  ) 
     292        self.log_pass() 
     293        self.fnTimeParts = bits[:] 
     294      elif self.pcfg.trangeType == 'ESA-CCI': 
     295        self.pcfg.checkTrangeLen = False 
     296        tt = self.fnParts[self.pcfg.trangeIndex]  
     297        if self.test( len(tt) in [4,6,8,10,12,14] and self.re_c1.match(tt) != None, 'Length of indicative date/time not consistent with YYYY[MM[DD[HH[MM[SS]]]]] specification: %s' % self.fnParts[-1], part=True  ): 
     298          ll = [tt[:4],] 
     299          tt = tt[4:] 
     300          for j in range(5): 
     301            if len(tt) > 0: 
     302              ll.append( tt[:2] ) 
     303              tt = tt[2:] 
     304            else: 
     305              ll.append( '00' ) 
     306          indDateTime = map( int, ll ) 
     307          self.test( indDateTime[1] in range(1,13), 'Invalid Month in indicative date time %s' % str(ll), part=True ) 
     308          self.test( indDateTime[2] in range(1,32), 'Invalid Day in indicative date time %s' % str(ll), part=True ) 
     309          self.test( indDateTime[3] in range(25), 'Invalid hour in indicative date time %s' % str(ll), part=True ) 
     310          self.test( indDateTime[4] in range(60), 'Invalid minute in indicative date time %s' % str(ll), part=True ) 
     311          self.test( indDateTime[5] in range(60), 'Invalid second in indicative date time %s' % str(ll), part=True ) 
    262312 
    263313    self.checkId = '003' 
     
    277327      if ok: 
    278328        self.log_pass() 
    279     self.completed = True 
     329 
     330  def do_check_fnextra(self): 
     331    self.checkId = ('004','file_name_extra' ) 
     332    vocabs = self.pcfg.vocabs 
     333    m = [] 
     334    for a in self.pcfg.controlledFnParts: 
     335      if self.fnDict.has_key(a): 
     336        try: 
     337          if not vocabs[a].check( str(self.fnDict[a]) ): 
     338            m.append( (a,self.fnDict[a],vocabs[a].note) ) 
     339        except: 
     340          print 'failed trying to check file name component %s' % a 
     341          raise 
     342          ##raise baseException1( 'failed trying to check file name component %s' % a ) 
     343 
     344    self.test( len(m)  == 0, 'File name components do not match constraints: %s' % str(m) ) 
     345 
    280346 
    281347class checkGlobalAttributes(checkBase): 
     
    459525    m = [] 
    460526    for i in range(len(self.globalAttributesInFn)): 
    461        if self.globalAttributesInFn[i] != None: 
     527       if self.globalAttributesInFn[i] != None and self.globalAttributesInFn[i][0] != '*': 
    462528         targVal = fnParts[i] 
    463529         if self.globalAttributesInFn[i][0] == "@": 
     
    887953class checkByVar(checkBase): 
    888954 
    889   def init(self): 
     955  def init(self,fileNameSeparator='_'): 
    890956    self.id = 'C5.001' 
    891957    self.checkId = 'unset' 
    892958    self.step = 'Initialised' 
    893959    self.checks = (self.checkTrange,) 
     960    self.fnsep = fileNameSeparator 
    894961 
    895962  def setLogDict( self,fLogDict ): 
     
    901968    for f in flist: 
    902969      fn = string.split(f, '/' )[-1] 
    903       fnParts = string.split( fn[:-3], '_' ) 
     970      fnParts = string.split( fn[:-3], self.fnsep ) 
    904971       
    905972      try: 
  • CCCC/trunk/ceda_cc/xceptions.py

    r180 r262  
    55class loggedException(Exception): 
    66  pass 
     7class baseException1(Exception): 
     8  pass 
     9 
    710class baseException(Exception): 
    811 
     
    1316        return unicode(self).encode('utf-8') 
    1417 
     18  def __repr__(self): 
     19    return self.msg 
     20 
    1521  def __unicode__(self): 
    1622        return self.msg % tuple([force_unicode(p, errors='replace') 
Note: See TracChangeset for help on using the changeset viewer.