1 | |
---|
2 | ## |
---|
3 | ## still missing map from new groups to old table variables. |
---|
4 | ## need a key lookup by variable in each table. |
---|
5 | ## |
---|
6 | |
---|
7 | import collections, string, hashlib, uuid, os, sys |
---|
8 | import dreqSX as sx |
---|
9 | odir = 'sh20150708' |
---|
10 | if not os.path.isdir( odir ): |
---|
11 | os.mkdir( odir ) |
---|
12 | |
---|
13 | cmip5vgr = ['Omon','fx','Oyr','Oclim','Omon_3d','Omon_oth','Amon','Lmon','LImon','OImon','aero_3d','aero_oth','cfDay_2d','cfDay_3d','cfMon_3dstd','cfMon_3dmod','cfMon_2dmod','cfMon_sim','day_oth','day_ss','cfOff','cfSites','6hrLev','6hrPlev','3hr','cf3hr_grid','cf3hr_sim'] |
---|
14 | ccmivgrdict = {'fixed':'fx','annual':'yr','monthly':'mon','daily':'day','hourly':'hr'} |
---|
15 | specsvgr = ['fx','mon','day','6hr'] |
---|
16 | |
---|
17 | ee_nv1 = collections.defaultdict( list ) |
---|
18 | ee_var_nv1 = collections.defaultdict( list ) |
---|
19 | ee_tabs = collections.defaultdict( list ) |
---|
20 | ee_grps = collections.defaultdict( list ) |
---|
21 | ee_usedVars = collections.defaultdict( list ) |
---|
22 | |
---|
23 | kmap = { 'cfday':'cfDay', 'Limon':'LImon', 'CMIP5_Llmon':'CMIP5_LImon', 'NEW':'new', 'New':'new'} |
---|
24 | |
---|
25 | for k in sx.eenv.keys(): |
---|
26 | mip = string.strip(sx.eenv[k][1]) |
---|
27 | var = string.strip(sx.eenv[k][5]) |
---|
28 | uid = sx.eenv[k][-1] |
---|
29 | ee_nv1[uid].append( k ) |
---|
30 | ee_var_nv1[var].append( k ) |
---|
31 | assert uid == k, 'mismatch in uid in record %s' % k |
---|
32 | |
---|
33 | def tabref1( ti ): |
---|
34 | if ti == 'new BUT': |
---|
35 | return 'CMIP5_Lmon' |
---|
36 | elif ti == 'new': |
---|
37 | return 'new' |
---|
38 | else: |
---|
39 | return ti |
---|
40 | |
---|
41 | for k in sx.ee0.keys(): |
---|
42 | r = sx.ee0[k] |
---|
43 | mip = r[-2] |
---|
44 | tab = r[2] |
---|
45 | grp = r[0] |
---|
46 | thistab = tabref1(tab) |
---|
47 | ##ee_tabs[tab].append(k) |
---|
48 | ee_tabs[thistab].append(k) |
---|
49 | ee_grps[grp].append(k) |
---|
50 | |
---|
51 | kf = [] |
---|
52 | kfn = [] |
---|
53 | kfxx = [] |
---|
54 | kfgp = [] |
---|
55 | knf = [] |
---|
56 | kmapmap = {} |
---|
57 | for k in ee_tabs.keys(): |
---|
58 | if string.find( k, ' ' ) != -1: |
---|
59 | k2 = string.split(k)[0] |
---|
60 | else: |
---|
61 | k2 = k |
---|
62 | |
---|
63 | k2 = kmap.get( k2,k2) |
---|
64 | |
---|
65 | if k2 in ['new','']: |
---|
66 | kfn.append(k) |
---|
67 | kmapmap[k] = 'new' |
---|
68 | elif sx.ee_miptables_old.has_key(k2): |
---|
69 | kf.append(k) |
---|
70 | kmapmap[k] = k2 |
---|
71 | elif ee_grps.has_key(k2): |
---|
72 | kfgp.append(k) |
---|
73 | kmapmap[k] = k2 |
---|
74 | else: |
---|
75 | if sx.ee_miptables_old.has_key('CMIP5_%s' % k2 ): |
---|
76 | kfxx.append(k) |
---|
77 | kmapmap[k] = 'CMIP5_%s' % k2 |
---|
78 | else: |
---|
79 | knf.append(k) |
---|
80 | |
---|
81 | thisd = {} |
---|
82 | print 'Missing groups' |
---|
83 | for k in knf: |
---|
84 | thise = collections.defaultdict( int ) |
---|
85 | for k2 in ee_tabs[k]: |
---|
86 | thise[sx.ee0[k2][-2]] += 1 |
---|
87 | keys = thise.keys() |
---|
88 | keys.sort() |
---|
89 | thisd[k] = keys[:] |
---|
90 | print k, '::: ', keys |
---|
91 | |
---|
92 | ### look for new variables in variable groups. |
---|
93 | class d1(object): |
---|
94 | def __init__(self): |
---|
95 | self.d = collections.defaultdict( list ) |
---|
96 | self.onItemNotFound = 'error' |
---|
97 | |
---|
98 | def keys(self): |
---|
99 | return self.d.keys() |
---|
100 | |
---|
101 | def __setitem__(self,k,v): |
---|
102 | self.d[k].append(v) |
---|
103 | |
---|
104 | def __itemNotFound__(self,k): |
---|
105 | if self.onItemNotFound == 'error': |
---|
106 | raise KeyError( 'Key %s not found in dictionary keys' % k) |
---|
107 | |
---|
108 | def __getitem__(self,k): |
---|
109 | if self.d.has_key(k): |
---|
110 | return self.d[k] |
---|
111 | else: |
---|
112 | return self.__itemNotFound__(k) |
---|
113 | |
---|
114 | ## look up table, giving uuid from old mip tables for each mip, var combination. |
---|
115 | mvlookup = collections.defaultdict( d1 ) |
---|
116 | |
---|
117 | ee_unusedVars = [] |
---|
118 | nok = 0 |
---|
119 | nn = 0 |
---|
120 | ee0_null = collections.defaultdict( list ) |
---|
121 | ee0_xref = collections.defaultdict( list ) |
---|
122 | ee0_xref_old = collections.defaultdict( list ) |
---|
123 | for t in sx.ee_miptables_old.keys(): |
---|
124 | for u in sx.ee_miptables_old[t]: |
---|
125 | v1 = sx.ee_mip[u][15] |
---|
126 | v2 = sx.ee_mip[u][21] |
---|
127 | if v2 != '': |
---|
128 | vid = v2 |
---|
129 | else: |
---|
130 | vid = v1 |
---|
131 | mvlookup[t].d[vid].append( u ) |
---|
132 | |
---|
133 | ##for k in kfn: |
---|
134 | ##for u in ee_tabs[k]: |
---|
135 | |
---|
136 | |
---|
137 | ## ee0_xref values are values of ee_var_nv1 |
---|
138 | for u in sx.ee0.keys(): |
---|
139 | var = string.strip(sx.ee0[u][1]) |
---|
140 | tab = kmapmap.get(tabref1(sx.ee0[u][2]), None ) |
---|
141 | if ee_var_nv1.has_key(var): |
---|
142 | nok += 1 |
---|
143 | ee_usedVars[var].append( u ) |
---|
144 | ## reference to kex in eenv |
---|
145 | ee0_xref[u] += ((0,ee_var_nv1[var] ),) |
---|
146 | elif ee_var_nv1.has_key(var + "**"): |
---|
147 | nok += 1 |
---|
148 | ee_usedVars[var].append( u ) |
---|
149 | ee0_xref[u] += ((0,ee_var_nv1[var + "**"] ),) |
---|
150 | else: |
---|
151 | ok = False |
---|
152 | if mvlookup.has_key(tab): |
---|
153 | if mvlookup[tab].d.has_key( var ): |
---|
154 | ul = mvlookup[tab].d[var] |
---|
155 | if len(ul) > 1: |
---|
156 | print 'ERROR.001.011: variable %s has multiple entries %s' % (var,tab) |
---|
157 | ee0_xref[u] += ((1,[ul[0],]),) |
---|
158 | ok = True |
---|
159 | else: |
---|
160 | print 'ERROR.001.010: variable %s not found in %s' % (var,tab) |
---|
161 | else: |
---|
162 | print 'ERROR.001.012: [%s] table not found: %s' % (var,tab) |
---|
163 | |
---|
164 | if not ok: |
---|
165 | print '-----',tab,sx.ee0[u] |
---|
166 | ee0_xref[u] += ((-1,[None,]),) |
---|
167 | |
---|
168 | if ( len(sx.ee0[u][1]) > 0 and sx.ee0[u][1][0] == '#' ) or ( len(sx.ee0[u][0]) > 0 and sx.ee0[u][0][0] == '#' ): |
---|
169 | ee0_null[u].append( '# comment' ) |
---|
170 | elif ( len(sx.ee0[u][0]) > 5 and sx.ee0[u][0][:4] == 'PMIP' ) and ( sx.ee0[u][1] in ['','*'] ): |
---|
171 | ee0_null[u].append( 'PMIP bulk copy' ) |
---|
172 | elif len(sx.ee0[u][1]) == 0 and len(sx.ee0[u][2]) == 0: |
---|
173 | ee0_null[u].append( 'comment' ) |
---|
174 | else: |
---|
175 | nn += 1 |
---|
176 | ee_unusedVars.append((u,var)) |
---|
177 | |
---|
178 | print nn, nok |
---|
179 | nok = 0 |
---|
180 | nn = 0 |
---|
181 | thise1 = [] |
---|
182 | uclear = {} |
---|
183 | ### run through CMIP5 variables which are modified, re-used etc in new CMIP6 groups. |
---|
184 | ### 43 not found .... as above ..... |
---|
185 | for k in kf + kfxx: |
---|
186 | k2 = kmapmap[k] |
---|
187 | for u in ee_tabs[k]: |
---|
188 | var = string.strip(sx.ee0[u][1] ) |
---|
189 | if not var in sx.ee_miptables_vars_old[k2]: |
---|
190 | print 'ERROR.0001.002: variables in sx.ee0 [new groups] not found: ',k,var,sx.ee0[u] |
---|
191 | nn += 1 |
---|
192 | else: |
---|
193 | ee_usedVars[var].append( u ) |
---|
194 | |
---|
195 | ok = False |
---|
196 | if sx.ee_ovars_old.has_key('%s.%s' % (k2,var)): |
---|
197 | uclear[u] = sx.ee_ovars_old['%s.%s' % (k2,var)] |
---|
198 | ok = True |
---|
199 | elif sx.ee_ovars_old.has_key('%s.%s**' % (k2,var)): |
---|
200 | uclear[u] = sx.ee_ovars_old['%s.%s**' % (k2,var)] |
---|
201 | ok = True |
---|
202 | elif len(string.split( k, ' ')) > 1: |
---|
203 | bits = string.split( k, ' ') |
---|
204 | v1 = string.strip(bits[1])[1:-1] |
---|
205 | if sx.ee_ovars_old.has_key('%s.%s' % (k2,v1)): |
---|
206 | uclear[u] = sx.ee_ovars_old['%s.%s' % (k2,v1)] |
---|
207 | ok = True |
---|
208 | |
---|
209 | if not ok: |
---|
210 | thise1.append((u,k2,var)) |
---|
211 | ##ee0_xref_old[u] |
---|
212 | |
---|
213 | ee_mv0 = {} |
---|
214 | ee_mv1 = {} |
---|
215 | ee_mv2 = {} |
---|
216 | ee_mv3 = {} |
---|
217 | for k in sx.eeold.keys(): |
---|
218 | k1 = sx.eeold[k][7] |
---|
219 | assert k1[:3] == 'mv.', 'Key not in correct syntax: %s' % k1 |
---|
220 | ee_mv0[k1] = k |
---|
221 | k2 = sx.eeold[k][8] |
---|
222 | if k2 != "": |
---|
223 | if k2 in ["*omit",'null','Depricated']: |
---|
224 | if k2 in ["*omit",'Depricated']: |
---|
225 | ee_mv3[ k1 ] = k2 |
---|
226 | else: |
---|
227 | starflag = False |
---|
228 | if k2[0] == '*': |
---|
229 | k2 = k2[1:] |
---|
230 | starflag = True |
---|
231 | assert k2[:3] == 'mv.', 'target key not in correct syntax: %s' % k2 |
---|
232 | if starflag: |
---|
233 | ee_mv2[k1] = k2 |
---|
234 | else: |
---|
235 | ee_mv1[k1] = k2 |
---|
236 | |
---|
237 | for k in ee_mv1.keys(): |
---|
238 | assert ee_mv0.has_key(k), 'key cross-ref not found: %s' % k |
---|
239 | |
---|
240 | ll_ov = [] |
---|
241 | for k in ee_mv0.keys(): |
---|
242 | if not (ee_mv3.has_key(k) or ee_mv1.has_key(k)): |
---|
243 | ll_ov.append( ee_mv0[k] ) |
---|
244 | print 'Number of old variables: %s [%s]' % (len(ll_ov), len(ee_mv0.keys()) ) |
---|
245 | print len(sx.eenv.keys()) |
---|
246 | print "Number of new variables: %s" % ( len(sx.eenv.keys()) ) |
---|
247 | ## |
---|
248 | ## extract variable name, units, standard name, long name, comment |
---|
249 | nt_vrecix = collections.namedtuple( 'vrecix', ['v','u','s','l','c'] ) |
---|
250 | vrecix_new = nt_vrecix._make( (5,9,6,10,11) ) |
---|
251 | vrecix_old = nt_vrecix._make( (1,5,3,2,4) ) |
---|
252 | |
---|
253 | nnf = 0 |
---|
254 | nnm = 0 |
---|
255 | for k in sx.ee_mip.keys(): |
---|
256 | t = map( lambda x: sx.ee_mip[k][x], [15,10,8,1,17] ) |
---|
257 | h = hashlib.md5(string.join(t) ).hexdigest() |
---|
258 | if not sx.ee_xref_bck.has_key(k): |
---|
259 | print '*******',sx.ee_mip[k] |
---|
260 | nnf += 1 |
---|
261 | elif len(sx.ee_xref_bck[k]) > 1: |
---|
262 | nnm += 1 |
---|
263 | print 'ERROR.0001.001: #################################' |
---|
264 | print sx.ee_mip[k] |
---|
265 | for k2 in sx.ee_xref_bck[k]: |
---|
266 | print sx.eeold[k2] |
---|
267 | |
---|
268 | print '#### Number of missing back references: ',nnf |
---|
269 | |
---|
270 | |
---|
271 | class dopenxx(object): |
---|
272 | def __init__(self): |
---|
273 | self.fn = 'xx_%s' % str( uuid.uuid1() ) |
---|
274 | self.oo = open('%s/%s' % (self.odir,self.fn), 'w' ) |
---|
275 | def write(self,x): |
---|
276 | self.oo.write(x) |
---|
277 | def close(self,rename=None): |
---|
278 | self.oo.close() |
---|
279 | if rename != None: |
---|
280 | os.rename( '%s/%s' % (self.odir,self.fn), '%s/%s' % (self.odir,rename) ) |
---|
281 | |
---|
282 | class dopen(object): |
---|
283 | def __init__(self,odir='./'): |
---|
284 | if not os.path.isdir(odir): |
---|
285 | os.mkdir(odir) |
---|
286 | self.odir = odir |
---|
287 | self.a = collections.defaultdict( self.ClassFact() ) |
---|
288 | def close(self,k,rename=None): |
---|
289 | if self.a.has_key(k): |
---|
290 | self.a[k].close(rename=rename) |
---|
291 | self.a.__delitem__(k) |
---|
292 | |
---|
293 | def ClassFact(self): |
---|
294 | class dopenyy(dopenxx): |
---|
295 | """Inherits all methods from dreqItemBase""" |
---|
296 | |
---|
297 | dopenyy.odir = self.odir |
---|
298 | return dopenyy |
---|
299 | print 'INFO 001 ##################',nnf, nnm |
---|
300 | |
---|
301 | import shelve |
---|
302 | ##sh = shelve.open( 'shelve_vars' ) |
---|
303 | sh = shelve.open( '%s/cmipVars' % odir, 'n' ) |
---|
304 | sh['__info__'] = { 'label':'cmipVars', 'title':'CMIP variables', 'prov':'sx.eenv and sx.eeold' } |
---|
305 | sh['__cols__'] = ['label','title','sn','units','description','procnote','procComment','prov'] |
---|
306 | |
---|
307 | ee = {} |
---|
308 | for k in sx.eenv.keys(): |
---|
309 | this = nt_vrecix._make( map( lambda x: sx.eenv[k][x], vrecix_new ) ) |
---|
310 | ##sh[k] = { 'label':this.v, 'title':this.l, 'sn':this.s, 'units':this.u, 'description':this.c, 'procnote':'', 'procComment':'','prov':'CMIP6 endorsement' } |
---|
311 | sh[k] = [ this.v, this.l, this.s, this.u, this.c, '', '','CMIP6 endorsement' ] |
---|
312 | ee[this.u] = k |
---|
313 | if this.u == 'atmos': |
---|
314 | print 'ERROR: bad units',k, sx.eenv[k] |
---|
315 | print '################################### var units #################' |
---|
316 | print ee.keys() |
---|
317 | print '##########################################################' |
---|
318 | |
---|
319 | nk = 0 |
---|
320 | for k in ll_ov: |
---|
321 | this = nt_vrecix._make( map( lambda x: sx.eeold[k][x], vrecix_old ) ) |
---|
322 | ##sh[k] = { 'label':this.v, 'title':this.l, 'sn':this.s, 'units':this.u, 'description':this.c, 'procnote':'', 'procComment':'','prov':sx.eeold[k][6]} |
---|
323 | sh[k] = [ this.v, this.l, this.s, this.u, this.c, '', '',sx.eeold[k][6] ] |
---|
324 | nk += 1 |
---|
325 | |
---|
326 | print len( sx.eenv.keys() ) + len(ll_ov), len( sh.keys() ), len( sx.eenv.keys() ) |
---|
327 | sh.close() |
---|
328 | |
---|
329 | ### |
---|
330 | ### shelve contains 1682 variables, of which 561 are new. |
---|
331 | ### |
---|
332 | |
---|
333 | ### have lost somewhere the var/cmor var distinction. E.g. hfibthermds/hfibthermds2d --- 2 CMOR variable names with a single output variable name. |
---|
334 | ### But this will not be in the physical variable -- so should come in the output variable record. |
---|
335 | |
---|
336 | class c1(object): |
---|
337 | def __init__(self): |
---|
338 | self.d = collections.defaultdict( int ) |
---|
339 | class c2(object): |
---|
340 | def __init__(self): |
---|
341 | self.d = collections.defaultdict( list ) |
---|
342 | ee = collections.defaultdict( int ) |
---|
343 | ff = collections.defaultdict( c1 ) |
---|
344 | #count different shapes: |
---|
345 | blk = collections.defaultdict( c2 ) |
---|
346 | for k in sx.ee0.keys(): |
---|
347 | ee[sx.ee0[k][5]] += 1 |
---|
348 | ff[sx.ee0[k][5]].d[sx.ee0[k][10]] += 1 |
---|
349 | blk[sx.ee0[k][5]].d[sx.ee0[k][10]].append(k) |
---|
350 | keys = ee.keys() |
---|
351 | keys.sort() |
---|
352 | print keys |
---|
353 | ####################################### |
---|
354 | eeo = collections.defaultdict( int ) |
---|
355 | ffo = collections.defaultdict( c1 ) |
---|
356 | #count different shapes: |
---|
357 | hdims = [ 'latitude','longitude'] |
---|
358 | vdims = [ 'alevel','alevhalf', 'olevel', 'rho', 'alt40', 'location', 'plev','alev1', 'plev3', 'plev7', 'plev8', 'plevs', 'sdepth'] |
---|
359 | |
---|
360 | catdims = ['site', 'basin', 'oline', 'dbze', 'scatratio', 'sza5', 'tau', 'vegtype', 'vgidx'] |
---|
361 | tdims = ['time','time1','time2'] |
---|
362 | |
---|
363 | knowndims = hdims + vdims + catdims + tdims |
---|
364 | |
---|
365 | nsmap = {'':'MISC...', u'XYZKT':'lon.lat.ATMOS.other.time', u'XKT':'PMIP....', u'4-element vector':'..transsi.time', u'KZT':'RFMIP...', u'2D vector field':'lon.lat..sithick.time', u'XYZ':'lon.lat.olevel..time', u'XYZT':'lon.lat.ATMOS..time', u'2D':'SIMIP ... (xYT?)', u'scalar':'?...', u'XY':'FAFMIP, ISMIP6....', u'YZT':'MISC....', u'XYKT':'lon.lat..other.time', u'XYT':'lon.lat...time', u'ZST1':'..alevel.site.time', u'?':'PMIP', u'BasinYT':'.lat..basin.time'} |
---|
366 | ## XYZ: this occurs only in FAFMIP |
---|
367 | ## XYZT: {'C4MIP': 80, 'PMIP': 10, 'RFMIP': 3, 'CFMIP': 19, 'LS3MIP': 8, 'GeoMIP': 1, 'HighResMIP': 82} |
---|
368 | ## need to follow up on what is vertical. # of levels is captured in most cases. |
---|
369 | ## PMIP: email sent -- 29th June : some ocean, some atmos |
---|
370 | ## LS3MIP: soil or snow layers, |
---|
371 | ## RFMIP: all levels. |
---|
372 | ## XYZKT: CFMIP -- K has various forms. |
---|
373 | ## XKT: PMIP -- salt transport -- should be lat-basin-time? |
---|
374 | ## blank: {'LUMIP': 40, 'PMIP': 11, 'RFMIP': 4, 'HighResMIP': 11, 'FAFMIP': 3, 'DynVar': 10} |
---|
375 | #----- RFMIP: scalar .... email sent -- these ar OfflineRad varibles. |
---|
376 | #----- LUMIP: XYT |
---|
377 | #----- PMIP: occurs in bulk copy requests ..... |
---|
378 | #----- HighResMip: occurs in comment lines |
---|
379 | #----- FAFMIP: occurs in comment lines |
---|
380 | #----- DynVar: probably XYZT -- need to check whether on model or pressure levels. |
---|
381 | ## YZT: 'DAMIP': 4, 'VolMIP': 2, 'HighResMIP': 10, 'SolarMIP': 4 |
---|
382 | # ---- DAMIP: zonal mean, 17 or 23 levels (pressure) .. |
---|
383 | # ---- VolMIP: zonal mean, all levels |
---|
384 | # ---- HighResMIP: zonal mean, 36 levels |
---|
385 | # ---- SolarMIP: zonal mean, 17 or 23 levels. (withdrawn and merged with DAMIP ....). |
---|
386 | ## scalar: 'ISMIP6': 8, 'SIMIP': 6 |
---|
387 | ## --> to KT, with K set to icesheet/hemisphere |
---|
388 | ## ----- SIMIP: time (monthly) |
---|
389 | ## '?': PMIP, for variables specified in C4MIP. |
---|
390 | ## |
---|
391 | |
---|
392 | ed = collections.defaultdict( c1 ) |
---|
393 | for k in sx.ee_mip.keys(): |
---|
394 | dl = sx.ee_mip[k][-1] |
---|
395 | if dl == 's|c|a|l|a|r': |
---|
396 | dl = 'scalar' |
---|
397 | dd = string.split( dl, '|' ) |
---|
398 | for d in dd[:-1]: |
---|
399 | |
---|
400 | ed[d].d[dd.index(d)] += 1 |
---|
401 | ed[dd[-1]].d['f'] += 1 |
---|
402 | eeo[sx.ee_mip[k][-1]] += 1 |
---|
403 | ffo[sx.ee_mip[k][-1]].d[sx.ee_mip[k][-2]] += 1 |
---|
404 | keys = eeo.keys() |
---|
405 | keys.sort() |
---|
406 | |
---|
407 | nkn = [] |
---|
408 | for k in ed.keys(): |
---|
409 | if k not in knowndims: |
---|
410 | nkn.append(k) |
---|
411 | nkn.sort() |
---|
412 | |
---|
413 | shlnks = shelve.open( '%s/requestLinks_tmp' % odir, 'r' ) |
---|
414 | shrvg = shelve.open( '%s/requestVarGroup_tmp' % odir, 'r' ) |
---|
415 | eervg = {} |
---|
416 | ## |
---|
417 | ## construct look-up by label |
---|
418 | ## |
---|
419 | for k in shrvg.keys(): |
---|
420 | if k[0] != '_': |
---|
421 | eervg[shrvg[k][2]] = k |
---|
422 | |
---|
423 | sh = shelve.open( 'dreq_consol_tables_reviewed_b_v20150708' ) |
---|
424 | revTabIds = sh.keys()[:] |
---|
425 | sh.close() |
---|
426 | |
---|
427 | ## parse 1 |
---|
428 | ee0_p1 = {} |
---|
429 | ee0_gp = collections.defaultdict( int ) |
---|
430 | ee0_gp = collections.defaultdict( d1 ) |
---|
431 | for k in sx.ee0.keys(): |
---|
432 | r = list(sx.ee0[k][:] ) |
---|
433 | if r[1] != '' and r[1][0] != '#': |
---|
434 | if ee0_xref.has_key(k): |
---|
435 | r1 = ee0_xref[k] |
---|
436 | assert len(r1) == 1, 'FATAL:001.0001: unexpected duplicate cross ref, %s %s %s' % (k,str(r1),str(r) ) |
---|
437 | assert len(r1[0][1]) == 1, 'FATAL:001.0002: unexpected duplicate cross ref, %s %s %s' % (k,str(r1),str(r) ) |
---|
438 | r += [r1[0][0],r1[0][1][0]] |
---|
439 | else: |
---|
440 | r += [-1,'__none__'] |
---|
441 | ee0_p1[k] = r[:] |
---|
442 | ee0_gp[r[0]][r[10]] = k |
---|
443 | if r[0] == '': |
---|
444 | print '################' |
---|
445 | print r |
---|
446 | print '################### groups referenced from new groups sheets' |
---|
447 | print ee0_gp.keys() |
---|
448 | k1 = ee0_gp.keys() |
---|
449 | k1.sort() |
---|
450 | kmapk1 = {} |
---|
451 | for k in k1: |
---|
452 | assert len(ee0_gp[k].keys()) in [0,1], 'Multiple references to %s' % k |
---|
453 | for k2 in ee0_gp[k].keys(): |
---|
454 | if eervg.has_key( '%s.%s' % (k2,k) ): |
---|
455 | kmapk1[k] = ('%s.%s' % (k2,k),'newGp') |
---|
456 | else: |
---|
457 | kmapk1[k] = ('%s.%s' % (k2,k),'unresolved') |
---|
458 | |
---|
459 | gpids = [] |
---|
460 | for k in kmapk1.keys(): |
---|
461 | gpids.append(kmapk1[k][0]) |
---|
462 | kmapk2 = {} |
---|
463 | |
---|
464 | mmmm = [('C_OceanT1', 'C_Ocean_T1'), ('C_OceanT2', 'C_Ocean_T2'), ('3hr_extreme','3hr_extr'), ('Amon_extreme','Amon_ext'), \ |
---|
465 | ('Amon_convection','Amon_conv'), ('L3hr','L_3hr'), ('LEday','L_day'), ('DYNVAR_day','DYVR_daily') ] |
---|
466 | mmmm = [('C_OceanT1', 'C_Ocean_T1'), ('C_OceanT2', 'C_Ocean_T2'), ('3hr_extreme','3hr_extr'), ('Amon_extreme','Amon_ext'), \ |
---|
467 | ('Amon_convection','Amon_conv'), ('L3hr','L_3hr'), ('DYNVAR_day','DYVR_daily') ] |
---|
468 | kmap000 = {} |
---|
469 | for t in mmmm: |
---|
470 | kmap000[t[0]] = t[1] |
---|
471 | ## check links from variable output specs to group. |
---|
472 | ## need to identify what is meant by group labels in "Request Scoping" -- now in eervg.keys() |
---|
473 | ## |
---|
474 | ks = eervg.keys() |
---|
475 | ks.sort() |
---|
476 | for k0 in ks: |
---|
477 | a,b = string.split(k0,'.') |
---|
478 | if kmap000.has_key(b): |
---|
479 | k = '%s.%s' % (a,kmap000[b]) |
---|
480 | else: |
---|
481 | k = k0 |
---|
482 | ok = False |
---|
483 | if b[:5] == 'CCMI1': |
---|
484 | b1,b2 = string.split(b, '_' ) |
---|
485 | if ccmivgrdict.has_key( b2): |
---|
486 | ok = True |
---|
487 | kmapk2[k0] = ('CCMI',b2) |
---|
488 | elif b[:5] == 'SPECS': |
---|
489 | b1,b2 = string.split(b, '_' ) |
---|
490 | if b2 in specsvgr: |
---|
491 | ok = True |
---|
492 | kmapk2[k0] = ('SPECS',b2) |
---|
493 | elif b2 == 'Amon': |
---|
494 | kmapk2[k0] = ('SPECS','mon') |
---|
495 | ok = True |
---|
496 | if not ok: |
---|
497 | if k in gpids: |
---|
498 | ## print 'OK1: ',k |
---|
499 | kmapk2[k0] = ('new',k) |
---|
500 | ok = True |
---|
501 | elif a in ['LUMIP','LS3MIP']: |
---|
502 | tk = 'C4MIP.%s' % kmap000.get(b,b) |
---|
503 | if tk in gpids: |
---|
504 | kmapk2[k0] = ('new',tk) |
---|
505 | ok = True |
---|
506 | elif a in ['VolMIP','DAMIP']: |
---|
507 | tk = 'DynVar.%s' % kmap000.get(b,b) |
---|
508 | if tk in gpids: |
---|
509 | kmapk2[k0] = ('new',tk) |
---|
510 | ok = True |
---|
511 | if not ok: |
---|
512 | if k in revTabIds: |
---|
513 | ## print 'OK2: ',k |
---|
514 | kmapk2[k0] = ('CMIP5Rev',k) |
---|
515 | else: |
---|
516 | if b in cmip5vgr: |
---|
517 | kmapk2[k0] = ('CMIP5',b) |
---|
518 | else: |
---|
519 | kmapk2[k0] = ('nomap',k) |
---|
520 | |
---|
521 | print '###############################################################' |
---|
522 | print '###############################################################' |
---|
523 | ## |
---|
524 | ## following list currently contains separate entries for group references from different MIPs --- want to have a single one |
---|
525 | ## here, and multiple references to it ... |
---|
526 | ## |
---|
527 | rvgref2 = collections.defaultdict( list ) |
---|
528 | lookup1 = collections.defaultdict( d1 ) |
---|
529 | lookup2 = collections.defaultdict( d1 ) |
---|
530 | import sx2_extra as sx2e |
---|
531 | sh = shelve.open( '%s/requestVarGroup' % odir, 'n' ) |
---|
532 | sh['__info__'] = { 'label':'requestVarGroup', 'title':'Identify variable groups' } |
---|
533 | sh['__cols__'] = [ 'uuid', 'mip', 'label','title','ref','refNote'] |
---|
534 | #### |
---|
535 | #### reconfigure to uuid, mip, label (no "."), title, class, refNote |
---|
536 | #### for "ttt==new", use sx2e.ngmap( kkk ) to give labels, keep kkk as refNote, ttt --> class |
---|
537 | ### |
---|
538 | ### lookup1: constructed from requestVarGroup list |
---|
539 | for k in shrvg.keys(): |
---|
540 | if k[0] != '_': |
---|
541 | id0 = shrvg[k][2] |
---|
542 | ttt,kkk = kmapk2[id0] |
---|
543 | lookup1[ttt].d[kkk].append(k) |
---|
544 | if ttt == 'new': |
---|
545 | rvgref2[kkk].append( shrvg[k][0] ) |
---|
546 | ##sh[k] = list( shrvg[k] ) + list( kmapk2[id0] ) |
---|
547 | ##print '>>>>>',k,sh[k] |
---|
548 | for k in lookup1.keys(): |
---|
549 | for k2 in lookup1[k].d.keys(): |
---|
550 | id = lookup1[k].d[k2][0] |
---|
551 | id0 = shrvg[id][2] |
---|
552 | mip = shrvg[id][1] |
---|
553 | ttt,kkk = kmapk2[id0] |
---|
554 | cls = k |
---|
555 | assert cls == k, 'Mismatch in lookup1' |
---|
556 | refNote = k2 |
---|
557 | if cls == 'new': |
---|
558 | lab = sx2e.ngmap[kkk] |
---|
559 | elif cls == 'CMIP5': |
---|
560 | lab = string.replace( k2, '_', '-' ) |
---|
561 | elif cls == 'CMIP5Rev': |
---|
562 | lab = string.replace( k2, '.', '-' ) |
---|
563 | elif cls == 'nomap': |
---|
564 | lab = string.replace( k2, '.', '-' ) |
---|
565 | lab = string.replace( lab, '_', '-' ) |
---|
566 | else: |
---|
567 | lab = '%s-%s' % (mip,k2) |
---|
568 | if sx2e.r1.match( lab ) == None: |
---|
569 | print 'NON_COMPLIANT label', lab |
---|
570 | print k,k2,shrvg[id] |
---|
571 | assert False |
---|
572 | sh[id] = [id,mip,lab,'%s: %s' % (mip,lab), cls, refNote] |
---|
573 | |
---|
574 | |
---|
575 | for k in ['seaiceday', 'seaicemon']: |
---|
576 | id = str( uuid.uuid1() ) |
---|
577 | mip = 'SIMIP' |
---|
578 | lab = '%s-%s' % (mip,k) |
---|
579 | title = '%s: %s' % (mip,k) |
---|
580 | cls = 'newOrp' |
---|
581 | refNote = '%s.%s' % (mip,k) |
---|
582 | sh[id] = [id,mip,lab,title, cls, refNote] |
---|
583 | lookup1[cls].d[refNote].append(id) |
---|
584 | |
---|
585 | sh.close() |
---|
586 | |
---|
587 | |
---|
588 | err001 = collections.defaultdict( int ) |
---|
589 | sh = shelve.open( '%s/groupItems' % odir, 'n' ) |
---|
590 | sh['__info__'] = {'label':'groupItems', 'title':'List of items in new groups', 'description':"New groups" } |
---|
591 | ##(u'DYVR_daily', u'utendnogw', '', u'daily', '', '', '', '', '', u'DynVar', 'DynVar', '772da0b6-25b6-11e5-8cf6-ac72891c3257') |
---|
592 | sh['__cols__'] = ['group','var','table','freq','', 'shape', '', '', '','mip','mip?','uuid','new','gpid','vkey','vid'] |
---|
593 | for k in ee0_p1.keys(): |
---|
594 | kkk = '%s.%s' % (ee0_p1[k][10],ee0_p1[k][0]) |
---|
595 | ##if not sx2e.ngmap.has_key(kkk): |
---|
596 | if lookup1['new'].d.has_key(kkk): |
---|
597 | id = lookup1['new'].d[kkk][0] |
---|
598 | elif lookup1['newOrp'].d.has_key(kkk): |
---|
599 | id = lookup1['newOrp'].d[kkk][0] |
---|
600 | else: |
---|
601 | id = None |
---|
602 | err001[kkk] += 1 |
---|
603 | r = ee0_p1[k][:] |
---|
604 | if id != None: |
---|
605 | r[-2] = 1 |
---|
606 | r[-1] = id |
---|
607 | else: |
---|
608 | r[-2] = -1 |
---|
609 | r[-1] = '__none__' |
---|
610 | t = ee0_xref[k][0] |
---|
611 | print '>>>>>>>>>>>>>>>>>>>>',t |
---|
612 | r += [t[0],t[1][0]] |
---|
613 | sh[k] = r[:] |
---|
614 | sh.close() |
---|
615 | |
---|
616 | |
---|
617 | sh = shelve.open( '%s/requestLinks' % odir, 'n' ) |
---|
618 | sh['__info__'] = { 'label':'requestLinks', 'title':'Links from variable groups to a request id' } |
---|
619 | sh['__cols__'] = [ 'uid', 'mip', 'tab','objective','grid','gridreq','comment','ref','refNote','refid'] |
---|
620 | for k in shlnks.keys(): |
---|
621 | if k[0] != '_': |
---|
622 | k1 = '%s.%s' % (shlnks[k][1],shlnks[k][2]) |
---|
623 | assert kmapk2.has_key(k1), 'Key missing %s, %s' % (k,k1) |
---|
624 | if not kmapk2.has_key(k1): |
---|
625 | print '###########',shlnks[k] |
---|
626 | else: |
---|
627 | ttt,kkk = kmapk2[k1] |
---|
628 | id = lookup1[ttt].d[kkk][0] |
---|
629 | sh[k] = list( shlnks[k] ) + list( kmapk2[k1] ) + [id,] |
---|
630 | sh.close() |
---|
631 | ## link to group items is now via mip + group id, using stuff from kmapk2 .... |
---|
632 | |
---|
633 | dorest = False |
---|
634 | if dorest: |
---|
635 | |
---|
636 | gg = {} |
---|
637 | for k in sx.eeold.keys(): |
---|
638 | if sx.eeold[k][7][:6] == 'mv.005': |
---|
639 | gg[sx.eeold[k][1]] = k |
---|
640 | ### |
---|
641 | ### BUT need to get full OMIP records in here ...... |
---|
642 | sh = shelve.open( '%s/refTableItems' % odir, 'n' ) |
---|
643 | sh['__info__'] = {'label':'refTableItems', 'title':'List of items in reference tables', 'prov':'sx.ee_mip' } |
---|
644 | sh['__cols__'] = ['uuid', 'comment', 'deflate_level', 'shuffle', 'ok_max_mean_abs', 'flag_meanings', \ |
---|
645 | 'type', 'ok_min_mean_abs', 'standard_name', 'deflate', 'long_name', 'valid_min',\ |
---|
646 | 'cell_methods', 'flag_values', 'cell_measures', 'out_name', 'modeling_realm', 'units',\ |
---|
647 | '#cell_methods', 'valid_max', 'positive', 'var', 'mipTable','dimensions','vid'] |
---|
648 | vdefex = {} |
---|
649 | for k in sx.ee_mip.keys(): |
---|
650 | if not sx.ee_xref_bck.has_key( k ): |
---|
651 | kl = ['__none__',] |
---|
652 | print 'ERROR.0002.003: no back references %s' % str(sx.ee_mip[k]) |
---|
653 | vnam0 = '' |
---|
654 | else: |
---|
655 | kl = sx.ee_xref_bck[ k ] |
---|
656 | if len(kl) != 1: |
---|
657 | print 'ERROR.0002.001: multiple back references %s' % str(kl) |
---|
658 | print sx.ee_mip[k] |
---|
659 | kv = kl[0] |
---|
660 | r = sx.eeold[kv] |
---|
661 | vnam0 = r[1] |
---|
662 | kit = 0 |
---|
663 | while r[8] not in [ '','null'] and r[8][0] != '*': |
---|
664 | if ee_mv0.has_key(r[8]): |
---|
665 | kv = ee_mv0[ r[8] ] |
---|
666 | r = sx.eeold[kv] |
---|
667 | else: |
---|
668 | print 'ERROR.0002.002: ambiguos back reference: %s' % str(r) |
---|
669 | kit += 1 |
---|
670 | assert kit < 5, 'Too many iterations: %s' % str(r) |
---|
671 | if kit > 1: |
---|
672 | print 'WARNING.0002.001: chained reference: %s' % str(r) |
---|
673 | kl = [kv,] |
---|
674 | if r[1] != vnam0: |
---|
675 | print 'INFO.0001.001: name change:',r,vnam0 |
---|
676 | vdefex[k] = list(r) + [vnam0,] |
---|
677 | sh[k] = sx.ee_mip[k][:] + [kl[0],] |
---|
678 | sh.close() |
---|
679 | |
---|
680 | sha = shelve.open( 'dreq_consol_tables_reviewed_a_v20150708' ) |
---|
681 | shb = shelve.open( 'dreq_consol_tables_reviewed_b_v20150708' ) |
---|
682 | keys = shb.keys() |
---|
683 | keys.sort() |
---|
684 | k0 = [] |
---|
685 | for k in keys: |
---|
686 | bb = string.split(k, '.' ) |
---|
687 | if len(bb) == 2: |
---|
688 | k0.append( tuple(bb) ) |
---|
689 | |
---|
690 | sh = shelve.open( '%s/revTabItems' % odir, 'n' ) |
---|
691 | sh['__info__'] = {'label':'revTabItems', 'title':'List of items in revised tables', 'description':"uuid (col 3) references an item in 'refTableItems'", 'prov':'from dreq_consol_tables_reviewed_b_v20150708, cross-referenced with sx.ee_miptables_old' } |
---|
692 | sh['__cols__'] = ['var','table','mip','uuid','priority'] |
---|
693 | #### |
---|
694 | dop = dopen('check3') |
---|
695 | kr = 0 |
---|
696 | |
---|
697 | for m,t in k0: |
---|
698 | vars = shb[ '%s.%s' % (m,t) ] |
---|
699 | ee = {} |
---|
700 | ff = {} |
---|
701 | rk = 'CMIP5_%s' % t |
---|
702 | if not sx.ee_miptables_old.has_key(rk): |
---|
703 | print '#################### key %s not found ' % rk |
---|
704 | else: |
---|
705 | for u in sx.ee_miptables_old[rk]: |
---|
706 | ee[ sx.ee_mip[u][21] ] = u |
---|
707 | if vdefex.has_key(u): |
---|
708 | if vdefex[u][1] != sx.ee_mip[u][21]: |
---|
709 | ff[vdefex[u][1]] = u |
---|
710 | ##if sx.ee_xref_bck.has_key(u): |
---|
711 | for v0 in vars.keys(): |
---|
712 | v = string.strip( v0 ) |
---|
713 | r = sha['records'][vars[v0][0]] |
---|
714 | if not ee.has_key(v): |
---|
715 | if ff.has_key(v): |
---|
716 | print 'INFO ... rename found',v |
---|
717 | thisu = ff[v] |
---|
718 | elif v[:7] == 'include': |
---|
719 | thisu = '__inc__' |
---|
720 | elif m == 'OMIP' and gg.has_key(v): |
---|
721 | print 'INFO ... new OMIP var found',v |
---|
722 | thisu = '__new__' |
---|
723 | else: |
---|
724 | print 'ERROR.003.0001 var not found: %s (%s, %s)' % (v,m,t) |
---|
725 | |
---|
726 | if m == 'OMIP': |
---|
727 | kr += 1 |
---|
728 | sr = map(str,r) |
---|
729 | dop.a['omip'].write( string.join( map(str,r), '\t' ) + '\n' ) |
---|
730 | dop.a['omip2'].write( string.join( map(lambda x: sr[x],[4,0,5,2,7]) + ['%s_%s' % (m,t),'mv.005.%4.4i' % kr], '\t' ) + '\n' ) |
---|
731 | |
---|
732 | ## not finding OMIP Oyr variables ... even where they are in vlsc5 and hencd in shold. Problem is table ref. |
---|
733 | ## need to look at new names as well ... as renamed in, e.g. OMIP |
---|
734 | thisu = None |
---|
735 | else: |
---|
736 | thisu = ee[v] |
---|
737 | kv = str( uuid.uuid1() ) |
---|
738 | sh[kv] = [v,t,m,thisu,vars[v0][1]] |
---|
739 | dop.close('omip',rename='omip.csv') |
---|
740 | dop.close('omip2',rename='omip2.csv') |
---|
741 | sh.close() |
---|