source: CCCC/trunk/ceda_cc/config_c4.py @ 232

Subversion URL: http://proj.badc.rl.ac.uk/svn/exarch/CCCC/trunk/ceda_cc/config_c4.py@232
Revision 232, 18.6 KB checked in by mjuckes, 6 years ago (diff)

bug in CCMI mip table file name pattern

RevLine 
[60]1import string
[96]2import utils_c4 as utils
[162]3import os
4import os.path as op
[167]5import shutil
[60]6
[162]7##############################################################################
8# Configure config-file paths
9#
10# All configuration directories, e.g. cmip5_vocabs, are looked for in a single
[163]11# parent directory.  This is the "config" directory within the package unless
12# the environment variable CC_CONFIG_DIR is set.
[162]13
[163]14HERE = op.dirname(__file__)
[167]15CC_CONFIG_DEFAULT_DIR = op.join(HERE, 'config')
16CC_CONFIG_DIR = os.environ.get('CC_CONFIG_DIR', CC_CONFIG_DEFAULT_DIR)
[162]17
18##############################################################################
19
[60]20validCmip5Experiments = ['1pctCO2', 'abrupt4xCO2', 'amip', 'amip4K', 'amip4xCO2', 'amipFuture', 'aqua4K', 'aqua4xCO2', 'aquaControl', 'decadal1959', 'decadal1960', 'decadal1961', 'decadal1962', 'decadal1963', 'decadal1964', 'decadal1965', 'decadal1966', 'decadal1967', 'decadal1968', 'decadal1969', 'decadal1970', 'decadal1971', 'decadal1972', 'decadal1973', 'decadal1974', 'decadal1975', 'decadal1976', 'decadal1977', 'decadal1978', 'decadal1979', 'decadal1980', 'decadal1981', 'decadal1982', 'decadal1983', 'decadal1984', 'decadal1985', 'decadal1986', 'decadal1987', 'decadal1988', 'decadal1989', 'decadal1990', 'decadal1991', 'decadal1992', 'decadal1993', 'decadal1994', 'decadal1995', 'decadal1996', 'decadal1997', 'decadal1998', 'decadal1999', 'decadal2000', 'decadal2001', 'decadal2002', 'decadal2003', 'decadal2004', 'decadal2005', 'decadal2006', 'decadal2007', 'decadal2008', 'decadal2009', 'decadal2010', 'decadal2011', 'decadal2012', 'esmControl', 'esmFdbk1', 'esmFdbk2', 'esmFixClim1', 'esmFixClim2', 'esmHistorical', 'esmrcp85', 'historical', 'historicalExt', 'historicalGHG', 'historicalMisc', 'historicalNat', 'lgm', 'midHolocene', 'noVolc1960', 'noVolc1965', 'noVolc1970', 'noVolc1975', 'noVolc1980', 'noVolc1985', 'noVolc1990', 'noVolc1995', 'noVolc2000', 'noVolc2005', 'past1000', 'piControl', 'rcp26', 'rcp45', 'rcp60', 'rcp85', 'sst2020', 'sst2030', 'sst2090', 'sst2090rcp45', 'sstClim', 'sstClim4xCO2', 'sstClimAerosol', 'sstClimSulfate', 'volcIn2010']
21
[107]22validCordexExperiment = validCmip5Experiments + ['evaluation']
[60]23
24
[150]25validCmip5Frequencies = ['fx','yr','monClim','mon','day','6hr','3hr','subhr']
26validCordexFrequencies = ['fx','sem','mon','day','6hr','3hr']
27validSpecsFrequencies = ['fx','mon','day','6hr']
28validCcmiFrequencies = ['fx','yr','mon','day','hr','subhr']
[204]29validSpecsExptFamilies = map( lambda x: string.strip( x ), 
[162]30                              open( op.join(CC_CONFIG_DIR, 'specs_vocabs/exptFamily.txt' )).readlines() )
[60]31
32validCordexDomainsL = [ 'SAM-44', 'CAM-44', 'NAM-44', 'EUR-44', 'AFR-44', 'WAS-44', 'EAS-44', 'CAS-44', 'AUS-44', 'ANT-44', 'ARC-44', 'MED-44']
33validCordexDomainsLi = map( lambda x: x + 'i', validCordexDomainsL )
34validCordexDomainsH = ['EUR-11']
35validCordexDomains = validCordexDomainsL + validCordexDomainsLi + validCordexDomainsH
36
37plevRequired = ['clh', 'clm', 'cll', 'ua850', 'va850', 'ta850', 'hus850', 'ua500', 'va500', 'ta500', 'zg500', 'ua200', 'va200', 'ta200', 'zg200']
38plevBndsRequired = ['clh', 'clm', 'cll']
39heightRequired = ['tas','tasmax','tasmin','huss','sfcWind','sfcWindmax','wsgsmax','uas','vas']
40
41
[162]42ii = open( op.join(CC_CONFIG_DIR, 'cordex_vocabs/GCMModelName.txt' )).readlines()
[60]43validGcmNames = []
44for l in ii:
45  if l[0] != '#' and len( string.strip(l) ) > 0:
46    validGcmNames.append( string.split(l)[0] )
47
[162]48ii = open( op.join(CC_CONFIG_DIR, 'cordex_vocabs/RCMModelName.txt' )).readlines()
[60]49validRcmNames = []
50validInstNames = []
51for l in ii:
52  if l[0] != '#' and len( string.strip(l) ) > 0:
53    bits = string.split(l)
54    validRcmNames.append( bits[0] )
55    validInstNames.append( bits[1] )
56
57plevValues = {'clh':22000, 'clm':56000, 'cll':84000}
58for i in [200,500,850]:
59  for v in ['zg','ua', 'va', 'ta', 'hus']:
60    k = '%s%s' % (v,i)
61    plevValues[k] = i*100
62
63heightRequired = ['tas', 'tasmax', 'tasmin', 'huss', 'sfcWind', 'sfcWindmax', 'wsgsmax', 'uas', 'vas']
64heightValues = {}
[80]65heightRange = {}
[60]66for v in heightRequired:
67  if v in ['tas', 'tasmax', 'tasmin', 'huss']:
68    heightValues[v] = 2
69  else:
70    heightValues[v] = 10
[80]71  heightRange[v] = (1.5,10.)
[60]72
[162]73ii = open( op.join(CC_CONFIG_DIR, 'cordex_vocabs/cordex_domains.csv' )).readlines()
[60]74keys = ['name','tag','res','grid_np_lon','grid_np_lat','nlon','nlat','w','e','s','n']
75rotatedPoleGrids = {}
76for l in ii[2:16]:
77  bits = string.split(string.strip(l),',')
78  ee = {}
79  i = 0
80  for k in keys:
81    if k in ['nlon','nlat']:
82      ee[k] = int(bits[i])
[80]83    elif k in ['grid_np_lon','grid_np_lat','w','e','s','n','res']:
[60]84      if bits[i] != 'N/A':
85        ee[k] = float(bits[i])
86      else:
87        ee[k] = bits[i]
88    else:
89      ee[k] = bits[i]
90    i += 1
91    rotatedPoleGrids[bits[1]] = ee
92
93##Area,Name, deg,Nlon,Nlat,West8,East8,South8,North8,
94keys = ['name','tag','res','nlon','nlat','w','e','s','n']
95interpolatedGrids = {}
96for l in ii[18:33]:
97  bits = string.split(string.strip(l),',')
98  ee = {}
99  i = 0
100  for k in keys:
101    if k in ['nlon','nlat']:
102      ee[k] = int(bits[i])
[80]103    elif k in ['w','e','s','n','res']:
[60]104        ee[k] = float(bits[i])
105    else:
106      ee[k] = bits[i]
107    i += 1
108    interpolatedGrids[bits[1]] = ee
109
[180]110class readVocab(object):
[60]111
[94]112  def __init__(self,dir):
113    self.dir = dir
114
[107]115  def getSimpleList(self,file,bit=None):
[162]116    ii = open('%s/%s/%s' % (CC_CONFIG_DIR, self.dir,file) )
[107]117    oo = []
118    for l in ii.readlines():
119      if l[0] != '#':
120        ll = string.strip(l)
121        if bit == None:
122          oo.append(ll)
123        else:
124          oo.append(string.split(ll)[bit])
125    return oo
[94]126
[114]127validSpecsInstitutions = ['IC3', 'MPI-M', 'KNMI', 'UOXF', 'CNRM-CERFACS', 'ENEA', 'MOHC', 'SMHI', 'IPSL', 'UREAD', 'ECWMF']
128
[96]129def getVocabs(pcfg):
130  "Returns a dictionary of vocabulary details for the project provided."
131  if pcfg.project == 'SPECS':
[144]132               ##'experiment_id':utils.patternControl( 'experiment_id', "(?P<val>.*)[0-9]{4}", list=validSpecsExptFamilies ), \
[109]133    vocabs = { 'variable':utils.mipVocab(pcfg), \
134               'Conventions':utils.listControl( 'Conventions', ['CF-1.6'] ), \
[150]135               'frequency':utils.listControl( 'frequency', validSpecsFrequencies ), \
[144]136               'experiment_id':utils.listControl( 'experiment_id', validSpecsExptFamilies ), \
[111]137               'initialization_method':utils.patternControl( 'initialization_method', "[0-9]+" ), \
138               'physics_version':utils.patternControl( 'physics_version', "[0-9]+" ), \
139               'realization':utils.patternControl( 'realization', "[0-9]+" ), \
[204]140               'startdate':utils.patternControl( 'startdate', "S[0-9]{8}" ), \
141               'associated_experiment':utils.patternControl( 'associated_experment', "(?P<val>(N/A|(decadal|seasonal): r\*i[0-9]{1,4}p[0-9]{1,4}))" ), \
142               'project_id':utils.listControl( 'project_id', ['SPECS', 'NMME-SPECS'] ), \
[129]143               ## 'institution':utils.listControl( 'institution', validSpecsInstitutions ), \
[136]144               'modeling_realm':utils.listControl( 'realm', ['atmos', 'ocean', 'land', 'landIce', 'seaIce', 'aerosol', 'atmosChem', 'ocnBgchem'], split=True ), \
[109]145             }
[150]146  elif pcfg.project == 'CMIP5':
147               ##'experiment_id':utils.patternControl( 'experiment_id', "(?P<val>.*)[0-9]{4}", list=validSpecsExptFamilies ), \
148    lrdr = readVocab( 'cmip5_vocabs/')
149    vocabs = { 'variable':utils.mipVocab(pcfg), \
150               'Conventions':utils.listControl( 'Conventions', ['CF-1.4','CF-1.5'] ), \
151               'experiment_id':utils.listControl( 'experiment_id', lrdr.getSimpleList( 'experiments.txt' ) ), \
152               'frequency':utils.listControl( 'frequency', validCmip5Frequencies ), \
153               'initialization_method':utils.patternControl( 'initialization_method', "[0-9]+" ), \
154               'physics_version':utils.patternControl( 'physics_version', "[0-9]+" ), \
155               'realization':utils.patternControl( 'realization', "[0-9]+" ), \
156               'project_id':utils.listControl( 'project_id', ['CMIP5'] ), \
157               ## 'institution':utils.listControl( 'institution', validSpecsInstitutions ), \
158               'modeling_realm':utils.listControl( 'realm', ['atmos', 'ocean', 'land', 'landIce', 'seaIce', 'aerosol', 'atmosChem', 'ocnBgchem'], split=True ), \
159             }
[107]160  elif pcfg.project == 'CCMI':
[118]161   
162    lrdr = readVocab( 'ccmi_vocabs/')
[107]163    vocabs = { 'variable':utils.mipVocab(pcfg), \
[150]164               'frequency':utils.listControl( 'frequency', validCcmiFrequencies ), \
[118]165               'experiment_id':utils.listControl( 'experiment_id', lrdr.getSimpleList( 'ccmi_experiments.txt', bit=-1 ) ), \
[123]166## do not preserve or check relation between model and institution.
[141]167               'institution':utils.listControl( 'institution', lrdr.getSimpleList( 'models_insts.txt', bit=1 ) ), \
[123]168               'model_id':utils.listControl( 'model_id', lrdr.getSimpleList( 'models_insts.txt', bit=0 ) ), \
[136]169               'modeling_realm':utils.listControl( 'realm', ['atmos', 'ocean', 'land', 'landIce', 'seaIce', 'aerosol', 'atmosChem', 'ocnBgchem'] ), \
170'atmosChem' and 'ocnBgchem'
[111]171               'project_id':utils.listControl( 'project_id', ['CCMI'] ) }
[98]172  elif pcfg.project == '__dummy':
173    vocabs = { 'variable':utils.mipVocab(pcfg,dummy=True) }
[96]174  else:
175    vocabs = { 'variable':utils.mipVocab(pcfg), \
[107]176           'driving_experiment_name':utils.listControl( 'driving_experiment_name', validCordexExperiment ), \
[96]177           'project_id':utils.listControl( 'project_id', ['CORDEX'] ), \
178           'CORDEX_domain':utils.listControl( 'CORDEX_domain',  validCordexDomains ), \
179           'driving_model_id':utils.listControl( 'driving_model_id',  validGcmNames ), \
180           'driving_model_ensemble_member':utils.patternControl( 'driving_model_ensemble_member',  'r[0-9]+i[0-9]+p[0-9]+' ), \
181           'rcm_version_id':utils.patternControl( 'rcm_version_id',  '[a-zA-Z0-9-]+' ), \
182           'model_id':utils.listControl( 'model_id',  validRcmNames ), \
183           'institute_id':utils.listControl( 'institute_id',  validInstNames ), \
[150]184           'frequency':utils.listControl( 'frequency', validCordexFrequencies ) }
[96]185
186  return vocabs
187
[180]188class projectConfig(object):
[94]189
190  def __init__(self, project):
[150]191    knownProjects = ['CMIP5','CCMI','CORDEX','SPECS','__dummy']
[94]192    assert project in knownProjects, 'Project %s not in knownProjects %s' % (project, str(knownProjects))
193
194    self.project = project
[155]195    self.gridSpecTol = 0.01
[94]196    if project == 'CORDEX':
197      self.requiredGlobalAttributes = [ 'institute_id', 'contact', 'rcm_version_id', 'product', 'CORDEX_domain', 'creation_date', \
198             'frequency', 'model_id', 'driving_model_id', 'driving_experiment', 'driving_model_ensemble_member', 'experiment_id']
199      self.controlledGlobalAttributes = ['frequency', 'driving_experiment_name', 'project_id', 'CORDEX_domain', 'driving_model_id', 'model_id', 'institute_id','driving_model_ensemble_member','rcm_version_id']
200      self.globalAttributesInFn = [None,'CORDEX_domain','driving_model_id','experiment_id','driving_model_ensemble_member','model_id','rcm_version_id']
201      self.requiredVarAttributes = ['long_name', 'standard_name', 'units']
202      self.drsMappings = {'variable':'@var','institute':'institute_id', 'product':'product', 'experiment':'experiment_id', \
203                        'ensemble':'driving_model_ensemble_member', 'model':'model_id', 'driving_model':'driving_model_id', \
204                        'frequency':'frequency', \
205                        'project':'project_id', 'domain':'CORDEX_domain', 'model_version':'rcm_version_id' }
206
207    elif project == 'SPECS':
208      lrdr = readVocab( 'specs_vocabs/')
209      self.requiredGlobalAttributes = lrdr.getSimpleList( 'globalAts.txt' )
[109]210      self.exptFamilies = lrdr.getSimpleList( 'exptFamily.txt', bit=0 )
[204]211      self.controlledGlobalAttributes = [ 'project_id','experiment_id', 'frequency','Conventions','modeling_realm', \
212                       'initialization_method','physics_version','realization','associated_experiment']
213      self.globalAttributesInFn = [None,'@mip_id','model_id','experiment_id','startdate','@ensemble']
214#sic_Oimon_EC-Earth2_seaIceBestInit_S19910501_r1i1p1_199501-199502.nc
215## mip_id derived from global attribute Table_id (CMOR convention); ensemble derived from rip attributes.
[94]216      self.requiredVarAttributes = ['long_name', 'standard_name', 'units']
[193]217      self.drsMappings = {'variable':'@var', 'institute':'institute_id', 'product':'product', 'experiment':'experiment_id', \
[221]218                        'ensemble':'@ensemble', 'model':'model_id', 'realm':'modeling_realm', \
[139]219                        'frequency':'frequency', 'start_date':'@forecast_reference_time', \
[193]220                        'table':'@mip_id',
[118]221                        'project':'project_id'}
[94]222
[150]223    elif project == 'CMIP5':
224      lrdr = readVocab( 'cmip5_vocabs/')
225      self.requiredGlobalAttributes = [ 'contact', 'product', 'creation_date', 'tracking_id', \
226              'experiment_id']
227      ##self.requiredGlobalAttributes = lrdr.getSimpleList( 'globalAts.txt' )
228      self.controlledGlobalAttributes = [ 'project_id','experiment_id', 'frequency','Conventions','modeling_realm', \
229                       'initialization_method','physics_version','realization']
230      self.globalAttributesInFn = [None,'@mip_id','model_id','experiment_id','@ensemble']
231#sic_Oimon_EC-Earth2_seaIceBestInit_S19910501_series1_r1i1p1_199501-199502.nc
232## mip_id derived from global attribute Table_id (CMOR convention); experiment family derived from experiment_id, ensemble derived from rip attributes.
233      self.requiredVarAttributes = ['long_name', 'standard_name', 'units']
[152]234## key: DRS element name, value: global attribute name or tag for mapping from file information ("@....").
235      self.drsMappings = {'variable':'@var', 'institute':'institute_id', 'product':'product', 'experiment':'experiment_id', \
[150]236                        'ensemble':'@ensemble', 'model':'model_id', 'realm':'modeling_realm', \
[155]237                        'frequency':'frequency',  'table':'@mip_id',
[150]238                        'project':'project_id'}
239
[107]240    elif project == 'CCMI':
241      lrdr = readVocab( 'ccmi_vocabs/')
[136]242      self.requiredGlobalAttributes = [ 'creation_date', 'tracking_id', 'forcing', 'model_id', 'parent_experiment_id', 'parent_experiment_rip', 'branch_time', 'contact', 'institute_id' ]
[107]243      self.requiredGlobalAttributes = lrdr.getSimpleList( 'globalAts.txt' )
[136]244      self.controlledGlobalAttributes = [ 'experiment_id', 'project', 'frequency' ]
[117]245      self.globalAttributesInFn = [None,'@mip_id','model_id','experiment_id','@ensemble']
[107]246      self.requiredVarAttributes = ['long_name', 'standard_name', 'units']
247      self.drsMappings = {'variable':'@var'}
248
[98]249    elif project == '__dummy':
250      self.requiredGlobalAttributes = map( lambda x: 'ga%s' % x, range(10) )
251      self.controlledGlobalAttributes = [ ]
252      self.globalAttributesInFn = [None,'ga2', 'ga3', 'ga4' ]
253      self.requiredVarAttributes = ['long_name', 'standard_name', 'units']
254      self.drsMappings = {'variable':'@var'}
255
[94]256####### used in checkStandardDims
257
258    self.plevRequired = plevRequired
259    self.plevValues = plevValues
260    self.heightRequired = heightRequired
261    self.heightValues = heightValues
262    self.heightRange = heightRange
263
264####### used in checkGrids
265    self.rotatedPoleGrids = rotatedPoleGrids
266    self.interpolatedGrids = interpolatedGrids
267    self.doCheckGrids = self.project in ['CORDEX',]
268
[95]269####### used in checkFileName (freqIndex also used in checkByVar)
[94]270
[95]271    if self.project == 'CORDEX':
272      self.fnPartsOkLen = [8,9]
273      self.fnPartsOkFixedLen = [8,]
274      self.fnPartsOkUnfixedLen = [9,]
275      self.checkTrangeLen = True
276      self.domainIndex = 1
277      self.freqIndex = 7
[150]278    elif self.project == 'CMIP5':
279## cRoot_Lmon_CESM1-WACCM_rcp85_r3i1p1_200601-205512.nc
280      self.fnPartsOkLen = [5,6]
281      self.fnPartsOkFixedLen = [5,]
282      self.fnPartsOkUnfixedLen = [6,]
283      self.checkTrangeLen = False
284      self.domainIndex = None
285      self.freqIndex = None
[95]286    elif self.project == 'SPECS':
[204]287      self.fnPartsOkLen = [6,7]
288      self.fnPartsOkFixedLen = [6,]
289      self.fnPartsOkUnfixedLen = [7,]
[95]290      self.checkTrangeLen = False
291      self.domainIndex = None
292      self.freqIndex = 1
[118]293    elif self.project == 'CCMI':
294      self.fnPartsOkLen = [5,6]
295      self.fnPartsOkFixedLen = [5,]
296      self.fnPartsOkUnfixedLen = [6,]
297      self.checkTrangeLen = False
298      self.domainIndex = None
299      self.freqIndex = None
[98]300    elif self.project == '__dummy':
301      self.fnPartsOkLen = [4,5]
302      self.fnPartsOkFixedLen = [4,]
303      self.fnPartsOkUnfixedLen = [5,]
304      self.checkTrangeLen = False
305      self.domainIndex = None
306      self.freqIndex = 1
[94]307
[95]308
[151]309    self.defaults = { 'variableDataType':'float' }
[95]310######## used in mipVocabs
311    if self.project == 'CORDEX':
[162]312       self.mipVocabDir = op.join(CC_CONFIG_DIR, 'cordex_vocabs/mip/')
[95]313       self.mipVocabTl = ['fx','sem','mon','day','6h','3h']
314       self.mipVocabVgmap = {'6h':'6hr','3h':'3hr'}
315       self.mipVocabFnpat = 'CORDEX_%s'
[150]316    elif self.project == 'CMIP5':
[162]317       self.mipVocabDir = op.join(CC_CONFIG_DIR, 'cmip5_vocabs/mip/')
[151]318       self.mipVocabTl = ['fx','Oyr','Oclim','Omon','Amon','Lmon','LImon','OImon','cfMon','aero','cfDay','day','cfOff','cfSites','6hrLev','6hrPlev','3hr','cf3hr']
[150]319       self.mipVocabVgmap = {}
320       self.mipVocabFnpat = 'CMIP5_%s'
[151]321       self.defaults['variableDataType'] = None 
[95]322    elif self.project == 'SPECS':
[162]323       self.mipVocabDir = op.join(CC_CONFIG_DIR, 'specs_vocabs/mip/')
[145]324       self.mipVocabTl = ['fx','Omon','Amon','Lmon','OImon','day','6hr']
[95]325       self.mipVocabVgmap = {}
326       self.mipVocabFnpat = 'SPECS_%s'
[107]327    elif self.project == 'CCMI':
[162]328       self.mipVocabDir = op.join(CC_CONFIG_DIR, 'ccmi_vocabs/mip/')
[107]329       self.mipVocabTl = ['fixed','annual','monthly','daily','hourly']
330       self.mipVocabVgmap = {'fixed':'fx','annual':'yr','monthly':'mon','daily':'day','hourly':'hr'}
[232]331       self.mipVocabFnpat = 'CCMI1_%s.txt'
[98]332    else:
333       self.mipVocabDir = None
334       self.mipVocabTl = ['day', 't2']
335       self.mipVocabVgmap = {}
336       self.mipVocabFnpat = None
[95]337    self.mipVocabPars = [self.mipVocabDir, self.mipVocabTl, self.mipVocabVgmap, self.mipVocabFnpat]
338
339######## used in checkByVar
340    if self.project == 'CORDEX':
341      self.groupIndex = 7
[150]342    elif self.project in ['CMIP5','CCMI','SPECS','__dummy']:
[95]343      self.groupIndex = 1
[96]344
345    self.vocabs = getVocabs(self)
[107]346
[118]347    ##assert self.project != 'CCMI', 'Not completely set up for CCMI yet'
[167]348
349
350def copy_config(dest_dir):
351   """
352   Copy the current default configuration directory into a separate directory.
353
354   The directory <ceda_cc-package-dir>/config is copied to `dest_dir`.
355   This is useful when ceda-cc is installed as a Python package and the user may
356   not know where the config directory is stored.
357
358   :param dest_dir: should be a path to a directory which does not yet exist. 
359       The configuration directory will be copied to this path.
360
361   """
362   shutil.copytree(CC_CONFIG_DEFAULT_DIR, dest_dir)
Note: See TracBrowser for help on using the repository browser.