source: CMIP6dreqbuild/trunk/srcMisc/sx2.py @ 355

Subversion URL: http://proj.badc.rl.ac.uk/svn/exarch/CMIP6dreqbuild/trunk/srcMisc/sx2.py@355
Revision 355, 23.8 KB checked in by mjuckes, 7 years ago (diff)

adding python scripts

Line 
1
2##
3## still missing map from new groups to old table variables.
4## need a key lookup by variable in each table.
5##
6
7import collections, string, hashlib, uuid, os, sys
8import dreqSX as sx
9odir = 'sh20150708'
10if not os.path.isdir( odir ):
11  os.mkdir( odir )
12
13cmip5vgr = ['Omon','fx','Oyr','Oclim','Omon_3d','Omon_oth','Amon','Lmon','LImon','OImon','aero_3d','aero_oth','cfDay_2d','cfDay_3d','cfMon_3dstd','cfMon_3dmod','cfMon_2dmod','cfMon_sim','day_oth','day_ss','cfOff','cfSites','6hrLev','6hrPlev','3hr','cf3hr_grid','cf3hr_sim']
14ccmivgrdict = {'fixed':'fx','annual':'yr','monthly':'mon','daily':'day','hourly':'hr'}
15specsvgr =  ['fx','mon','day','6hr']
16
17ee_nv1 = collections.defaultdict( list )
18ee_var_nv1 = collections.defaultdict( list )
19ee_tabs = collections.defaultdict( list )
20ee_grps = collections.defaultdict( list )
21ee_usedVars = collections.defaultdict( list )
22
23kmap = { 'cfday':'cfDay', 'Limon':'LImon', 'CMIP5_Llmon':'CMIP5_LImon', 'NEW':'new', 'New':'new'}
24
25for k in sx.eenv.keys():
26  mip = string.strip(sx.eenv[k][1])
27  var = string.strip(sx.eenv[k][5])
28  uid = sx.eenv[k][-1]
29  ee_nv1[uid].append( k )
30  ee_var_nv1[var].append( k )
31  assert uid == k, 'mismatch in uid in record %s' % k
32
33def tabref1( ti ):
34  if ti == 'new BUT':
35    return 'CMIP5_Lmon'
36  elif ti == 'new':
37    return 'new'
38  else:
39    return ti
40
41for k in sx.ee0.keys():
42  r = sx.ee0[k]
43  mip = r[-2]
44  tab = r[2]
45  grp = r[0]
46  thistab = tabref1(tab)
47  ##ee_tabs[tab].append(k)
48  ee_tabs[thistab].append(k)
49  ee_grps[grp].append(k)
50
51kf = []
52kfn = []
53kfxx = []
54kfgp = []
55knf = []
56kmapmap = {}
57for k in ee_tabs.keys():
58  if string.find( k, ' ' ) != -1:
59    k2 = string.split(k)[0]
60  else:
61    k2 = k
62
63  k2 = kmap.get( k2,k2)
64
65  if k2 in ['new','']:
66     kfn.append(k)
67     kmapmap[k] = 'new'
68  elif sx.ee_miptables_old.has_key(k2):
69     kf.append(k)
70     kmapmap[k] = k2
71  elif ee_grps.has_key(k2):
72     kfgp.append(k)
73     kmapmap[k] = k2
74  else:
75    if sx.ee_miptables_old.has_key('CMIP5_%s' % k2 ):
76      kfxx.append(k)
77      kmapmap[k] = 'CMIP5_%s' % k2
78    else:
79      knf.append(k)
80
81thisd = {}
82print 'Missing groups'
83for k in knf:
84  thise = collections.defaultdict( int )
85  for k2 in ee_tabs[k]:
86    thise[sx.ee0[k2][-2]] += 1
87  keys = thise.keys()
88  keys.sort()
89  thisd[k] = keys[:]
90  print k, '::: ', keys
91   
92### look for new variables in variable groups.
93class d1(object):
94  def __init__(self):
95    self.d = collections.defaultdict( list )
96    self.onItemNotFound = 'error'
97
98  def keys(self):
99    return self.d.keys()
100
101  def __setitem__(self,k,v):
102    self.d[k].append(v)
103
104  def __itemNotFound__(self,k):
105    if self.onItemNotFound == 'error':
106      raise  KeyError( 'Key %s not found in dictionary keys' % k)
107   
108  def __getitem__(self,k):
109    if self.d.has_key(k):
110      return self.d[k]
111    else:
112      return self.__itemNotFound__(k)
113
114## look up table, giving uuid from old mip tables for each mip, var combination.
115mvlookup = collections.defaultdict( d1 )
116
117ee_unusedVars = []
118nok = 0
119nn = 0
120ee0_null = collections.defaultdict( list )
121ee0_xref = collections.defaultdict( list )
122ee0_xref_old = collections.defaultdict( list )
123for t in sx.ee_miptables_old.keys():
124  for u in sx.ee_miptables_old[t]:
125    v1 = sx.ee_mip[u][15]
126    v2 = sx.ee_mip[u][21]
127    if v2 != '':
128      vid = v2
129    else:
130      vid = v1
131    mvlookup[t].d[vid].append( u )
132
133##for k in kfn:
134  ##for u in ee_tabs[k]:
135
136
137## ee0_xref values are values of ee_var_nv1
138for u in sx.ee0.keys():
139    var = string.strip(sx.ee0[u][1])
140    tab =  kmapmap.get(tabref1(sx.ee0[u][2]), None )
141    if ee_var_nv1.has_key(var):
142      nok += 1
143      ee_usedVars[var].append( u )
144## reference to kex in eenv
145      ee0_xref[u] += ((0,ee_var_nv1[var] ),)
146    elif ee_var_nv1.has_key(var + "**"):
147      nok += 1
148      ee_usedVars[var].append( u )
149      ee0_xref[u] += ((0,ee_var_nv1[var + "**"] ),)
150    else:
151      ok = False
152      if mvlookup.has_key(tab):
153        if mvlookup[tab].d.has_key( var ):
154          ul = mvlookup[tab].d[var]
155          if len(ul) > 1:
156            print 'ERROR.001.011: variable %s has multiple entries %s' % (var,tab)
157          ee0_xref[u] += ((1,[ul[0],]),)
158          ok = True
159        else:
160          print 'ERROR.001.010: variable %s not found in %s' % (var,tab)
161      else:
162          print 'ERROR.001.012: [%s] table not found: %s' % (var,tab)
163
164      if not ok:
165        print '-----',tab,sx.ee0[u]
166        ee0_xref[u] += ((-1,[None,]),)
167 
168        if ( len(sx.ee0[u][1]) > 0 and sx.ee0[u][1][0] == '#' ) or ( len(sx.ee0[u][0]) > 0 and sx.ee0[u][0][0] == '#' ):
169          ee0_null[u].append( '# comment' )
170        elif ( len(sx.ee0[u][0]) > 5 and sx.ee0[u][0][:4] == 'PMIP' ) and ( sx.ee0[u][1] in ['','*'] ):
171          ee0_null[u].append( 'PMIP bulk copy' )
172        elif len(sx.ee0[u][1]) == 0 and  len(sx.ee0[u][2]) == 0:
173          ee0_null[u].append( 'comment' )
174        else:
175          nn += 1
176          ee_unusedVars.append((u,var))
177   
178print nn, nok
179nok = 0
180nn = 0
181thise1 = []
182uclear = {}
183### run through CMIP5 variables which are modified, re-used etc in new CMIP6 groups.
184### 43 not found .... as above .....
185for k in kf + kfxx:
186  k2 = kmapmap[k]
187  for u in ee_tabs[k]:
188    var = string.strip(sx.ee0[u][1] )
189    if not var in sx.ee_miptables_vars_old[k2]:
190       print 'ERROR.0001.002: variables in sx.ee0 [new groups] not found: ',k,var,sx.ee0[u]
191       nn += 1
192    else:
193       ee_usedVars[var].append( u )
194
195    ok = False
196    if sx.ee_ovars_old.has_key('%s.%s' % (k2,var)):
197      uclear[u] = sx.ee_ovars_old['%s.%s' % (k2,var)]
198      ok = True
199    elif sx.ee_ovars_old.has_key('%s.%s**' % (k2,var)):
200      uclear[u] = sx.ee_ovars_old['%s.%s**' % (k2,var)]
201      ok = True
202    elif len(string.split( k, ' ')) > 1:
203      bits = string.split( k, ' ')
204      v1 = string.strip(bits[1])[1:-1]
205      if sx.ee_ovars_old.has_key('%s.%s' % (k2,v1)):
206        uclear[u] = sx.ee_ovars_old['%s.%s' % (k2,v1)]
207        ok = True
208     
209    if not ok:
210      thise1.append((u,k2,var))
211       ##ee0_xref_old[u]
212
213ee_mv0 = {}
214ee_mv1 = {}
215ee_mv2 = {}
216ee_mv3 = {}
217for k in sx.eeold.keys():
218  k1 = sx.eeold[k][7]
219  assert k1[:3] == 'mv.', 'Key not in correct syntax: %s' % k1
220  ee_mv0[k1] = k
221  k2 = sx.eeold[k][8]
222  if k2 != "":
223    if k2 in ["*omit",'null','Depricated']:
224      if k2 in ["*omit",'Depricated']:
225        ee_mv3[ k1 ] = k2
226    else:
227      starflag = False
228      if k2[0] == '*':
229        k2 = k2[1:]
230        starflag = True
231      assert k2[:3] == 'mv.', 'target key not in correct syntax: %s' % k2
232      if starflag:
233        ee_mv2[k1] = k2
234      else:
235        ee_mv1[k1] = k2
236
237for k in ee_mv1.keys():
238  assert ee_mv0.has_key(k), 'key cross-ref not found: %s' % k
239
240ll_ov = []
241for k in ee_mv0.keys():
242  if not (ee_mv3.has_key(k) or ee_mv1.has_key(k)):
243    ll_ov.append( ee_mv0[k] )
244print 'Number of old variables: %s [%s]' % (len(ll_ov), len(ee_mv0.keys()) )
245print  len(sx.eenv.keys())
246print "Number of new variables: %s" % ( len(sx.eenv.keys()) )
247##
248## extract variable name, units, standard name, long name, comment
249nt_vrecix = collections.namedtuple( 'vrecix', ['v','u','s','l','c'] )
250vrecix_new = nt_vrecix._make( (5,9,6,10,11) )
251vrecix_old = nt_vrecix._make( (1,5,3,2,4) )
252
253nnf = 0
254nnm = 0
255for k in sx.ee_mip.keys():
256   t = map( lambda x: sx.ee_mip[k][x], [15,10,8,1,17] )
257   h = hashlib.md5(string.join(t) ).hexdigest()
258   if not sx.ee_xref_bck.has_key(k):
259     print '*******',sx.ee_mip[k]
260     nnf += 1
261   elif len(sx.ee_xref_bck[k]) > 1:
262     nnm += 1
263     print 'ERROR.0001.001: #################################'
264     print sx.ee_mip[k]
265     for k2 in sx.ee_xref_bck[k]:
266       print sx.eeold[k2]
267
268print '#### Number of missing back references: ',nnf
269   
270
271class dopenxx(object):
272  def __init__(self):
273    self.fn = 'xx_%s' % str( uuid.uuid1() )
274    self.oo = open('%s/%s' % (self.odir,self.fn), 'w' )
275  def write(self,x):
276    self.oo.write(x)
277  def close(self,rename=None):
278    self.oo.close()
279    if rename != None:
280      os.rename( '%s/%s' % (self.odir,self.fn), '%s/%s' % (self.odir,rename) )
281
282class dopen(object):
283  def __init__(self,odir='./'):
284    if not os.path.isdir(odir):
285      os.mkdir(odir)
286    self.odir = odir
287    self.a = collections.defaultdict( self.ClassFact() )
288  def close(self,k,rename=None):
289    if self.a.has_key(k):
290      self.a[k].close(rename=rename)
291      self.a.__delitem__(k)
292
293  def ClassFact(self):
294     class dopenyy(dopenxx):
295       """Inherits all methods from dreqItemBase"""
296
297     dopenyy.odir = self.odir
298     return dopenyy
299print 'INFO 001 ##################',nnf, nnm
300
301import shelve
302##sh = shelve.open( 'shelve_vars' )
303sh = shelve.open( '%s/cmipVars' % odir, 'n' )
304sh['__info__'] = { 'label':'cmipVars', 'title':'CMIP variables', 'prov':'sx.eenv and sx.eeold' }
305sh['__cols__'] = ['label','title','sn','units','description','procnote','procComment','prov']
306
307ee = {}
308for k in sx.eenv.keys():
309  this = nt_vrecix._make( map( lambda x: sx.eenv[k][x], vrecix_new ) )
310  ##sh[k] = { 'label':this.v, 'title':this.l, 'sn':this.s, 'units':this.u, 'description':this.c, 'procnote':'', 'procComment':'','prov':'CMIP6 endorsement' }
311  sh[k] = [ this.v, this.l, this.s, this.u, this.c, '', '','CMIP6 endorsement' ]
312  ee[this.u] = k
313  if this.u == 'atmos':
314     print 'ERROR: bad units',k, sx.eenv[k]
315print '################################### var units #################'
316print ee.keys()
317print '##########################################################'
318
319nk = 0
320for k in ll_ov:
321  this = nt_vrecix._make( map( lambda x: sx.eeold[k][x], vrecix_old ) )
322  ##sh[k] = { 'label':this.v, 'title':this.l, 'sn':this.s, 'units':this.u, 'description':this.c, 'procnote':'', 'procComment':'','prov':sx.eeold[k][6]}
323  sh[k] = [ this.v, this.l, this.s, this.u, this.c, '', '',sx.eeold[k][6] ]
324  nk += 1
325
326print len( sx.eenv.keys() ) + len(ll_ov), len( sh.keys() ),  len( sx.eenv.keys() )
327sh.close()
328 
329###
330###  shelve contains 1682 variables, of which 561 are new.
331###
332   
333### have lost somewhere the var/cmor var distinction. E.g. hfibthermds/hfibthermds2d  --- 2 CMOR variable names with a single output variable name.
334### But this will not be in the physical variable -- so should come in the output variable record.
335
336class c1(object):
337  def __init__(self):
338    self.d = collections.defaultdict( int )
339class c2(object):
340  def __init__(self):
341    self.d = collections.defaultdict( list )
342ee = collections.defaultdict( int )
343ff = collections.defaultdict( c1 )
344#count different shapes:
345blk = collections.defaultdict( c2 )
346for k in sx.ee0.keys():
347  ee[sx.ee0[k][5]] += 1
348  ff[sx.ee0[k][5]].d[sx.ee0[k][10]] += 1
349  blk[sx.ee0[k][5]].d[sx.ee0[k][10]].append(k)
350keys = ee.keys()
351keys.sort()
352print keys
353#######################################
354eeo = collections.defaultdict( int )
355ffo = collections.defaultdict( c1 )
356#count different shapes:
357hdims = [ 'latitude','longitude']
358vdims = [ 'alevel','alevhalf', 'olevel', 'rho', 'alt40', 'location', 'plev','alev1', 'plev3', 'plev7', 'plev8', 'plevs', 'sdepth']
359
360catdims = ['site', 'basin', 'oline', 'dbze', 'scatratio', 'sza5', 'tau', 'vegtype', 'vgidx']
361tdims = ['time','time1','time2']
362
363knowndims = hdims + vdims + catdims + tdims
364
365nsmap = {'':'MISC...', u'XYZKT':'lon.lat.ATMOS.other.time', u'XKT':'PMIP....', u'4-element vector':'..transsi.time', u'KZT':'RFMIP...', u'2D vector field':'lon.lat..sithick.time', u'XYZ':'lon.lat.olevel..time', u'XYZT':'lon.lat.ATMOS..time', u'2D':'SIMIP ... (xYT?)', u'scalar':'?...', u'XY':'FAFMIP, ISMIP6....', u'YZT':'MISC....', u'XYKT':'lon.lat..other.time', u'XYT':'lon.lat...time', u'ZST1':'..alevel.site.time', u'?':'PMIP', u'BasinYT':'.lat..basin.time'}
366## XYZ: this occurs only in FAFMIP
367## XYZT:  {'C4MIP': 80, 'PMIP': 10, 'RFMIP': 3, 'CFMIP': 19, 'LS3MIP': 8, 'GeoMIP': 1, 'HighResMIP': 82}
368##   need to follow up on what is vertical.  # of levels is captured in most cases.
369##   PMIP: email sent -- 29th June : some ocean, some atmos
370##   LS3MIP: soil or snow layers,
371## RFMIP: all levels.
372## XYZKT: CFMIP  -- K has various forms.
373## XKT: PMIP  -- salt transport -- should be lat-basin-time?
374## blank:  {'LUMIP': 40, 'PMIP': 11, 'RFMIP': 4, 'HighResMIP': 11, 'FAFMIP': 3, 'DynVar': 10}
375#-----  RFMIP: scalar .... email sent -- these ar OfflineRad varibles.
376#-----  LUMIP: XYT
377#-----  PMIP: occurs in bulk copy requests .....
378#-----  HighResMip: occurs in comment lines
379#-----  FAFMIP: occurs in comment lines
380#-----  DynVar: probably XYZT -- need to check whether on model or pressure levels.
381## YZT: 'DAMIP': 4, 'VolMIP': 2, 'HighResMIP': 10, 'SolarMIP': 4
382# ----  DAMIP: zonal mean, 17 or 23 levels (pressure) ..
383# ----  VolMIP: zonal mean, all levels
384# ----  HighResMIP: zonal mean, 36 levels
385# ----  SolarMIP: zonal mean, 17 or 23 levels.  (withdrawn and merged with DAMIP ....).
386## scalar: 'ISMIP6': 8, 'SIMIP': 6
387##  --> to KT, with K set to icesheet/hemisphere
388## ----- SIMIP: time (monthly)
389## '?': PMIP, for variables specified in C4MIP.
390##
391
392ed = collections.defaultdict( c1 )
393for k in sx.ee_mip.keys():
394  dl = sx.ee_mip[k][-1]
395  if dl == 's|c|a|l|a|r':
396     dl = 'scalar'
397  dd = string.split( dl, '|' )
398  for d in dd[:-1]:
399   
400    ed[d].d[dd.index(d)] += 1
401  ed[dd[-1]].d['f'] += 1
402  eeo[sx.ee_mip[k][-1]] += 1
403  ffo[sx.ee_mip[k][-1]].d[sx.ee_mip[k][-2]] += 1
404keys = eeo.keys()
405keys.sort()
406
407nkn = []
408for k in ed.keys():
409  if k not in knowndims:
410    nkn.append(k)
411nkn.sort()
412
413shlnks = shelve.open( '%s/requestLinks_tmp' % odir, 'r' )
414shrvg = shelve.open( '%s/requestVarGroup_tmp' % odir, 'r' )
415eervg = {}
416##
417## construct look-up by label
418##
419for k in shrvg.keys():
420  if k[0] != '_':
421    eervg[shrvg[k][2]] = k
422
423sh = shelve.open( 'dreq_consol_tables_reviewed_b_v20150708' )
424revTabIds = sh.keys()[:]
425sh.close()
426
427## parse 1
428ee0_p1 = {}
429ee0_gp = collections.defaultdict( int )
430ee0_gp = collections.defaultdict( d1 )
431for k in sx.ee0.keys():
432  r = list(sx.ee0[k][:] )
433  if r[1] != '' and r[1][0] != '#':
434    if ee0_xref.has_key(k):
435      r1 = ee0_xref[k]
436      assert len(r1) == 1, 'FATAL:001.0001: unexpected duplicate cross ref, %s   %s %s' % (k,str(r1),str(r) )
437      assert len(r1[0][1]) == 1, 'FATAL:001.0002: unexpected duplicate cross ref, %s   %s %s' % (k,str(r1),str(r) )
438      r += [r1[0][0],r1[0][1][0]]
439    else:
440      r += [-1,'__none__']
441    ee0_p1[k] = r[:]
442    ee0_gp[r[0]][r[10]] = k
443    if r[0] == '':
444       print '################'
445       print r
446print '###################   groups referenced from new groups sheets'
447print ee0_gp.keys()
448k1 = ee0_gp.keys()
449k1.sort()
450kmapk1 = {}
451for k in k1:
452  assert len(ee0_gp[k].keys()) in  [0,1], 'Multiple references to %s' % k
453  for k2 in ee0_gp[k].keys():
454    if eervg.has_key( '%s.%s' % (k2,k) ):
455      kmapk1[k] = ('%s.%s' % (k2,k),'newGp')
456    else:
457      kmapk1[k] = ('%s.%s' % (k2,k),'unresolved')
458
459gpids = []
460for k in kmapk1.keys():
461  gpids.append(kmapk1[k][0])
462kmapk2 = {}
463
464mmmm = [('C_OceanT1', 'C_Ocean_T1'), ('C_OceanT2', 'C_Ocean_T2'), ('3hr_extreme','3hr_extr'), ('Amon_extreme','Amon_ext'), \
465        ('Amon_convection','Amon_conv'), ('L3hr','L_3hr'), ('LEday','L_day'), ('DYNVAR_day','DYVR_daily') ]
466mmmm = [('C_OceanT1', 'C_Ocean_T1'), ('C_OceanT2', 'C_Ocean_T2'), ('3hr_extreme','3hr_extr'), ('Amon_extreme','Amon_ext'), \
467        ('Amon_convection','Amon_conv'), ('L3hr','L_3hr'), ('DYNVAR_day','DYVR_daily') ]
468kmap000 = {}
469for t in mmmm:
470  kmap000[t[0]] = t[1]
471## check links from variable output specs to group.
472## need to identify what is meant by group labels in "Request Scoping" -- now in eervg.keys()
473##
474ks = eervg.keys()
475ks.sort()
476for k0 in ks:
477  a,b = string.split(k0,'.')
478  if kmap000.has_key(b):
479    k = '%s.%s' % (a,kmap000[b])
480  else:
481    k = k0
482  ok = False
483  if b[:5] == 'CCMI1':
484    b1,b2 = string.split(b, '_' )
485    if ccmivgrdict.has_key( b2):
486      ok = True
487      kmapk2[k0] = ('CCMI',b2)
488  elif b[:5] == 'SPECS':
489    b1,b2 = string.split(b, '_' )
490    if b2 in specsvgr:
491      ok = True
492      kmapk2[k0] = ('SPECS',b2)
493    elif b2 == 'Amon':
494      kmapk2[k0] = ('SPECS','mon')
495      ok = True
496  if not ok:
497    if k in gpids:
498    ## print 'OK1: ',k
499      kmapk2[k0] = ('new',k)
500      ok = True
501    elif a in ['LUMIP','LS3MIP']:
502      tk = 'C4MIP.%s' % kmap000.get(b,b)
503      if tk in gpids:
504        kmapk2[k0] = ('new',tk)
505        ok = True
506    elif a in ['VolMIP','DAMIP']:
507      tk = 'DynVar.%s' % kmap000.get(b,b)
508      if tk in gpids:
509        kmapk2[k0] = ('new',tk)
510        ok = True
511  if not ok:
512    if k in revTabIds:
513    ## print 'OK2: ',k
514      kmapk2[k0] = ('CMIP5Rev',k)
515    else:
516      if b in cmip5vgr:
517        kmapk2[k0] = ('CMIP5',b)
518      else:
519        kmapk2[k0] = ('nomap',k)
520   
521print '###############################################################'
522print '###############################################################'
523##
524## following list currently contains separate entries for group references from different MIPs --- want to have a single one
525## here, and multiple references to it ...
526##
527rvgref2 = collections.defaultdict( list )
528lookup1 = collections.defaultdict( d1 )
529lookup2 = collections.defaultdict( d1 )
530import sx2_extra as sx2e
531sh = shelve.open( '%s/requestVarGroup' % odir, 'n' )
532sh['__info__'] = { 'label':'requestVarGroup', 'title':'Identify variable groups' }
533sh['__cols__'] = [ 'uuid', 'mip', 'label','title','ref','refNote']
534####
535#### reconfigure to uuid, mip, label (no "."), title, class, refNote
536#### for "ttt==new", use sx2e.ngmap( kkk ) to give labels, keep kkk as refNote, ttt --> class
537###
538### lookup1: constructed from requestVarGroup list
539for k in shrvg.keys():
540  if k[0] != '_':
541    id0 = shrvg[k][2]
542    ttt,kkk = kmapk2[id0]
543    lookup1[ttt].d[kkk].append(k)
544    if ttt == 'new':
545      rvgref2[kkk].append( shrvg[k][0] )
546    ##sh[k] = list( shrvg[k] ) + list( kmapk2[id0] )
547    ##print '>>>>>',k,sh[k]
548for k in lookup1.keys():
549  for k2 in lookup1[k].d.keys():
550    id = lookup1[k].d[k2][0]
551    id0 = shrvg[id][2]
552    mip = shrvg[id][1]
553    ttt,kkk = kmapk2[id0]
554    cls = k
555    assert  cls == k, 'Mismatch in lookup1'
556    refNote = k2
557    if cls == 'new':
558      lab = sx2e.ngmap[kkk]
559    elif cls == 'CMIP5':
560      lab = string.replace( k2, '_', '-' )
561    elif cls == 'CMIP5Rev':
562      lab = string.replace( k2, '.', '-' )
563    elif cls == 'nomap':
564      lab = string.replace( k2, '.', '-' )
565      lab = string.replace( lab, '_', '-' )
566    else:
567      lab = '%s-%s' % (mip,k2)
568    if sx2e.r1.match( lab ) == None:
569      print 'NON_COMPLIANT label', lab
570      print k,k2,shrvg[id]
571      assert False
572    sh[id] = [id,mip,lab,'%s: %s' % (mip,lab), cls, refNote]
573
574
575for k in ['seaiceday', 'seaicemon']:
576    id =  str( uuid.uuid1() )
577    mip = 'SIMIP'
578    lab = '%s-%s' % (mip,k)
579    title = '%s: %s' % (mip,k)
580    cls = 'newOrp'
581    refNote = '%s.%s' % (mip,k)
582    sh[id] = [id,mip,lab,title, cls, refNote]
583    lookup1[cls].d[refNote].append(id)
584     
585sh.close()
586
587
588err001 = collections.defaultdict( int )
589sh = shelve.open( '%s/groupItems' % odir, 'n' )
590sh['__info__'] =  {'label':'groupItems', 'title':'List of items in new groups', 'description':"New groups" }
591##(u'DYVR_daily', u'utendnogw', '', u'daily', '', '', '', '', '', u'DynVar', 'DynVar', '772da0b6-25b6-11e5-8cf6-ac72891c3257')
592sh['__cols__'] = ['group','var','table','freq','', 'shape', '', '', '','mip','mip?','uuid','new','gpid','vkey','vid']
593for k in ee0_p1.keys():
594    kkk = '%s.%s' % (ee0_p1[k][10],ee0_p1[k][0])
595    ##if not sx2e.ngmap.has_key(kkk):
596    if lookup1['new'].d.has_key(kkk):
597       id = lookup1['new'].d[kkk][0] 
598    elif lookup1['newOrp'].d.has_key(kkk):
599       id = lookup1['newOrp'].d[kkk][0] 
600    else:
601       id = None
602       err001[kkk] += 1
603    r = ee0_p1[k][:]
604    if id != None:
605      r[-2] = 1
606      r[-1] = id
607    else:
608      r[-2] = -1
609      r[-1] = '__none__'
610    t = ee0_xref[k][0]
611    print '>>>>>>>>>>>>>>>>>>>>',t
612    r += [t[0],t[1][0]]
613    sh[k] = r[:]
614sh.close()
615 
616
617sh = shelve.open( '%s/requestLinks' % odir, 'n' )
618sh['__info__'] = { 'label':'requestLinks', 'title':'Links from variable groups to a request id' }
619sh['__cols__'] = [ 'uid', 'mip', 'tab','objective','grid','gridreq','comment','ref','refNote','refid']
620for k in shlnks.keys():
621  if k[0] != '_':
622    k1 = '%s.%s' % (shlnks[k][1],shlnks[k][2])
623    assert kmapk2.has_key(k1), 'Key missing  %s, %s' % (k,k1)
624    if not kmapk2.has_key(k1):
625      print '###########',shlnks[k]
626    else:
627      ttt,kkk = kmapk2[k1]
628      id = lookup1[ttt].d[kkk][0]
629      sh[k] = list( shlnks[k] ) + list( kmapk2[k1] ) + [id,]
630sh.close()
631## link to group items is now via mip + group id, using stuff from kmapk2 ....
632
633dorest = False
634if dorest:
635
636 gg = {}
637 for k in sx.eeold.keys():
638  if sx.eeold[k][7][:6] == 'mv.005':
639    gg[sx.eeold[k][1]] = k
640###
641### BUT need to get full OMIP records in here ......
642 sh = shelve.open( '%s/refTableItems' % odir, 'n' )
643 sh['__info__'] =  {'label':'refTableItems', 'title':'List of items in reference tables', 'prov':'sx.ee_mip' }
644 sh['__cols__'] = ['uuid', 'comment', 'deflate_level', 'shuffle', 'ok_max_mean_abs', 'flag_meanings', \
645                          'type', 'ok_min_mean_abs', 'standard_name', 'deflate', 'long_name', 'valid_min',\
646                           'cell_methods', 'flag_values', 'cell_measures', 'out_name', 'modeling_realm', 'units',\
647                           '#cell_methods', 'valid_max', 'positive', 'var', 'mipTable','dimensions','vid'] 
648 vdefex = {}
649 for k in sx.ee_mip.keys():
650  if not sx.ee_xref_bck.has_key( k ):
651    kl = ['__none__',]
652    print 'ERROR.0002.003: no back references %s' % str(sx.ee_mip[k])
653    vnam0 = ''
654  else:
655    kl = sx.ee_xref_bck[ k ]
656    if len(kl) != 1:
657      print 'ERROR.0002.001: multiple back references %s' % str(kl)
658      print sx.ee_mip[k]
659    kv = kl[0]
660    r = sx.eeold[kv]
661    vnam0 = r[1]
662    kit = 0
663    while r[8] not in [ '','null'] and r[8][0] != '*':
664      if ee_mv0.has_key(r[8]):
665        kv = ee_mv0[ r[8] ]
666        r = sx.eeold[kv]
667      else:
668        print 'ERROR.0002.002: ambiguos back reference: %s' % str(r)
669      kit += 1
670      assert kit < 5, 'Too many iterations: %s' % str(r)
671      if kit > 1:
672        print 'WARNING.0002.001: chained reference: %s' % str(r)
673    kl = [kv,]
674  if r[1] != vnam0:
675    print 'INFO.0001.001: name change:',r,vnam0
676  vdefex[k] = list(r) + [vnam0,] 
677  sh[k] = sx.ee_mip[k][:]  + [kl[0],]
678 sh.close()
679
680 sha = shelve.open( 'dreq_consol_tables_reviewed_a_v20150708' )
681 shb = shelve.open( 'dreq_consol_tables_reviewed_b_v20150708' )
682 keys = shb.keys()
683 keys.sort()
684 k0 = []
685 for k in keys:
686  bb = string.split(k, '.' )
687  if len(bb) == 2:
688    k0.append( tuple(bb) )
689
690 sh = shelve.open( '%s/revTabItems' % odir, 'n' )
691 sh['__info__'] =  {'label':'revTabItems', 'title':'List of items in revised tables', 'description':"uuid (col 3) references an item in 'refTableItems'", 'prov':'from dreq_consol_tables_reviewed_b_v20150708, cross-referenced with sx.ee_miptables_old' }
692 sh['__cols__'] = ['var','table','mip','uuid','priority']
693####
694 dop = dopen('check3')
695 kr = 0
696
697 for m,t in k0:
698  vars = shb[ '%s.%s' % (m,t) ]
699  ee = {}
700  ff = {}
701  rk = 'CMIP5_%s' % t
702  if not sx.ee_miptables_old.has_key(rk):
703     print '####################  key %s not found ' % rk
704  else:
705    for u in sx.ee_miptables_old[rk]:
706      ee[ sx.ee_mip[u][21] ] = u
707      if vdefex.has_key(u):
708        if vdefex[u][1] != sx.ee_mip[u][21]:
709          ff[vdefex[u][1]] = u
710      ##if sx.ee_xref_bck.has_key(u):
711    for v0 in vars.keys():
712      v = string.strip( v0 )
713      r = sha['records'][vars[v0][0]]
714      if not ee.has_key(v):
715        if ff.has_key(v):
716          print 'INFO ... rename found',v
717          thisu = ff[v]
718        elif v[:7] == 'include':
719          thisu = '__inc__'
720        elif m == 'OMIP' and gg.has_key(v):
721          print 'INFO ... new OMIP var found',v
722          thisu = '__new__'
723        else:
724          print 'ERROR.003.0001 var not found: %s (%s, %s)' % (v,m,t)
725         
726          if m == 'OMIP':
727            kr += 1
728            sr =  map(str,r)
729            dop.a['omip'].write( string.join( map(str,r), '\t' ) + '\n' )
730            dop.a['omip2'].write( string.join( map(lambda x: sr[x],[4,0,5,2,7]) + ['%s_%s' % (m,t),'mv.005.%4.4i' % kr], '\t' ) + '\n' )
731         
732## not finding OMIP Oyr variables ... even where they are in vlsc5 and hencd in shold. Problem is table ref.
733## need to look at new names as well ... as renamed in, e.g. OMIP
734          thisu = None
735      else:
736        thisu = ee[v]
737      kv = str( uuid.uuid1() )
738      sh[kv] = [v,t,m,thisu,vars[v0][1]]
739 dop.close('omip',rename='omip.csv')
740 dop.close('omip2',rename='omip2.csv')
741 sh.close()
Note: See TracBrowser for help on using the repository browser.