1 | from scansh02 import rq |
---|
2 | import string, collections, uuid, shelve |
---|
3 | import importWbRef |
---|
4 | import xlrd |
---|
5 | from importWbRef import cellMethodCheck |
---|
6 | import importWbMods |
---|
7 | cmip5GrpLk = collections.defaultdict( dict ) |
---|
8 | |
---|
9 | class tableUtils(object): |
---|
10 | def __init__(self): |
---|
11 | self.subGroup = None |
---|
12 | self.sgset = set() |
---|
13 | self.dset001 = set() |
---|
14 | self.vdict = {} |
---|
15 | self.sgsetExpected = {'Amon_2d', 'Omon_3d', 'Omon_oth', 'Oyr_3dtr', 'aero_3d', 'aero_oth', 'cf3hr_grid', 'cf3hr_sim', 'cfDay_2d', 'cfDay_3d', 'cfMon_2dmod', 'cfMon_3dmod', 'cfMon_3dstd', 'cfMon_sim', 'day_oth', 'day_ss'} |
---|
16 | |
---|
17 | def subGroupCheck(self): |
---|
18 | d1 = list( self.sgsetExpected.difference( self.sgset ) ) |
---|
19 | d2 = list( self.sgset.difference( self.sgsetExpected ) ) |
---|
20 | ok = True |
---|
21 | if len(d1) > 0: |
---|
22 | print 'ERROR.sg01.00001: not all expected subgroups found ',sorted(d1) |
---|
23 | ok = False |
---|
24 | if len(d2) > 0: |
---|
25 | print 'ERROR.sg01.00002: not all found subgroups expected ',sorted(d2) |
---|
26 | ok = False |
---|
27 | return ok |
---|
28 | |
---|
29 | def getSubGroup(self,dims,tab0,var,ixs,ttl,uid, priority): |
---|
30 | '''Used to identify subgroups of variables in CMIP5 tables and revised versions of these tables''' |
---|
31 | self.subGroup = None |
---|
32 | sg = None |
---|
33 | if tab0[:5] == 'CMIP5': |
---|
34 | tab = tab0[6:] |
---|
35 | elif tab0[:4] == 'OMIP': |
---|
36 | tab = tab0[5:] |
---|
37 | else: |
---|
38 | tab = tab0 |
---|
39 | ix = int( float( ixs ) ) |
---|
40 | ## |
---|
41 | ## this is probably unreliable .... also not used ..... (March 2016) |
---|
42 | ## |
---|
43 | if tab == 'Oyr' and ix < 60: |
---|
44 | sg = 'Oyr_3dtr' |
---|
45 | elif tab == 'Amon': |
---|
46 | if dims.find( 'lev' ) == -1 and dims.find( 'longitude|latitude') != -1: |
---|
47 | sg = 'Amon_2d' |
---|
48 | elif tab == 'Omon': |
---|
49 | if string.find( dims, 'longitude|latitude|olevel' ) != -1: |
---|
50 | sg = 'Omon_3d' |
---|
51 | else: |
---|
52 | sg = 'Omon_oth' |
---|
53 | elif tab == 'cfMon': |
---|
54 | if ttl[:5] in ['ISCCP','PARAS','CALIP']: |
---|
55 | sg = 'cfMon_sim' |
---|
56 | elif var[-3:] == 'co2': |
---|
57 | if string.find( dims, 'longitude|latitude|alev' ) != -1: |
---|
58 | sg = 'cfMon_3dmod' |
---|
59 | else: |
---|
60 | sg = 'cfMon_2dmod' |
---|
61 | else: |
---|
62 | sg = 'cfMon_3dstd' |
---|
63 | elif tab == 'cfDay': |
---|
64 | try: |
---|
65 | bits = string.split( dims, '|' ) |
---|
66 | if string.find( dims, 'longitude|latitude' ) != -1 and len(bits) > 2 and bits[2] in ['alev','alevel','alevhalf','alt40','plev7','alt40']: |
---|
67 | sg = 'cfDay_3d' |
---|
68 | else: |
---|
69 | sg = 'cfDay_2d' |
---|
70 | except: |
---|
71 | print 'SEVERE: Failed to parse: ',dims |
---|
72 | elif tab == 'cf3hr': |
---|
73 | if string.find( dims, 'longitude|latitude' ) != -1: |
---|
74 | sg = 'cf3hr_grid' |
---|
75 | else: |
---|
76 | sg = 'cf3hr_sim' |
---|
77 | elif tab == 'aero': |
---|
78 | if string.find( dims, 'longitude|latitude|alevel' ) != -1: |
---|
79 | sg = 'aero_3d' |
---|
80 | else: |
---|
81 | sg = 'aero_oth' |
---|
82 | elif tab == 'day': |
---|
83 | if ix < 28: |
---|
84 | sg = 'day_ss' |
---|
85 | else: |
---|
86 | sg = 'day_oth' |
---|
87 | if sg != None: |
---|
88 | self.subGroup = sg |
---|
89 | self.sgset.add(sg) |
---|
90 | self.vdict[uid] = (var,ttl,'CMIP6',tab,sg, priority) |
---|
91 | return True |
---|
92 | else: |
---|
93 | if tab0 not in self.dset001: |
---|
94 | print 'ERROR.ivg002.0001: No SUBGROUP: ', tab0 |
---|
95 | self.dset001.add(tab0) |
---|
96 | return False |
---|
97 | |
---|
98 | class structureList(object): |
---|
99 | ## table headings for structure list spreadsheet |
---|
100 | sats = [ u'label', u'title', u'description', u'procNote', u'prov', |
---|
101 | u'odims', u'coords', u'cell_methods', u'cell_measures', u'flag_meanings', u'flag_values', |
---|
102 | u'spid', u'tmid', u'cmid', u'cids', u'dids', u'uid' ] |
---|
103 | ##hd = sats[:-6] + ['spatial label','time label'] + sats[-6:] |
---|
104 | hd = ['spatial label','time label'] + sats |
---|
105 | |
---|
106 | def __init__(self): |
---|
107 | self.force = ['str-a02',] |
---|
108 | self.spsh = importWbRef.loadSpatial() |
---|
109 | self.spsh.addTime() |
---|
110 | self.cellm = importWbRef.loadCellm() |
---|
111 | self.ll = set() |
---|
112 | self.lldict = dict() |
---|
113 | self.structAttr = ['spid','tmid'] + self.sats |
---|
114 | self.tmsp2str = {} |
---|
115 | self.importRef02() |
---|
116 | self.strRepMap = {} |
---|
117 | self.impStructRep() |
---|
118 | self.modStr = importWbMods.loadStr() |
---|
119 | |
---|
120 | self.nt_dims = collections.namedtuple( 'dims', ['uid','label','spid','tlab','odims','coords','cell_methods','cell_measures','flag_values','flag_meanings','description','procNote','prov'] ) |
---|
121 | self.nt2_dims = collections.namedtuple( 'dims', ['uid','label','splab','tlab','odims','coords','cell_methods','cell_measures','flag_values','flag_meanings','description','procNote','prov'] ) |
---|
122 | self.ee3 = {} |
---|
123 | #self.cmch = cellMethodCheck() |
---|
124 | sh = shelve.open( '../framework/inSh/structRef', 'r' ) |
---|
125 | self.tmap = {} |
---|
126 | self.messageDone = set() |
---|
127 | self.importExRef() |
---|
128 | cc = collections.defaultdict( dict ) |
---|
129 | dd = dict() |
---|
130 | self.orecs = set() |
---|
131 | self.kr = 0 |
---|
132 | self.newStrLabels = set() |
---|
133 | self.uidByLab = {} |
---|
134 | for k in sh.keys(): |
---|
135 | print k,sh[k] |
---|
136 | if k[0] != '_': |
---|
137 | #['tmid', 'spid', 'odims', 'coords', 'cell_methods', 'cell_measures', 'flag_meanings', 'uid','label','title','description','flag_values','time_label'] |
---|
138 | rr = sh[k][:7] |
---|
139 | rr[0] = sh[k][-2] |
---|
140 | t = tuple( rr ) |
---|
141 | flg = sh[k][-1] |
---|
142 | if flg: |
---|
143 | assert t not in cc[True], 'duplicate lookup tuple for structures ....%s:\n %s\n %s' % ( str(t), str( dd[t] ), str( sh[k] ) ) |
---|
144 | dd[t] = sh[k] |
---|
145 | cc[flg][t] = sh[k][7:-1] + [sh[k][0],] |
---|
146 | |
---|
147 | for t in cc[True]: |
---|
148 | self.tmap[t] = cc[True][t] |
---|
149 | for t in cc[False]: |
---|
150 | if t not in self.tmap: |
---|
151 | self.tmap[t] = cc[False][t] |
---|
152 | |
---|
153 | for t,r in self.tmap.items(): |
---|
154 | print '>>>>>>>> STRUCTURE :::',r |
---|
155 | self.uidByLab[r[1]] = r[0] |
---|
156 | |
---|
157 | def asDict(self,uid): |
---|
158 | assert uid in self.ref2ByUid, 'uid not found: %s' % uid |
---|
159 | d1 = {} |
---|
160 | for i in range(len(self.hd)): |
---|
161 | d1[self.hd[i]] = self.ref2ByUid[uid][i] |
---|
162 | return d1 |
---|
163 | |
---|
164 | def impStructRep(self): |
---|
165 | wb = xlrd.open_workbook( 'inputs/structureRepeats.xls', 'r' ) |
---|
166 | sht = wb.sheet_by_name( 'Sheet1' ) |
---|
167 | for i in range(sht.nrows): |
---|
168 | r = [x.value for x in sht.row(i)] |
---|
169 | if len(r[0]) > 3 and r[0][:4] == 'str-': |
---|
170 | assert r[0].strip() in self.ref2ByLab, 'Attempt to map to undefined label: %s' % r[0].strip() |
---|
171 | self.strRepMap[ r[2].strip() ] = r[0].strip() |
---|
172 | print 'INFO.strmap.00001: %s --- > %s' % (r[2].strip(), r[0].strip()) |
---|
173 | |
---|
174 | def importRef02(self): |
---|
175 | wb = xlrd.open_workbook( 'inputs/refStructure.xlsx', 'r' ) |
---|
176 | sht = wb.sheet_by_name( 's1' ) |
---|
177 | r1 = [x.value for x in sht.row(0)] |
---|
178 | assert r1 == self.hd, 'Headings of refStructure.xlsx do not match expected values: %s' % str(r1) |
---|
179 | print 'IMPORT REF 02: PASSED FIRST CHECK' |
---|
180 | inx = [self.hd.index(x) for x in ['time label', 'spatial label', 'odims', 'coords', 'cell_methods', u'cell_measures', 'flag_meanings'] ] |
---|
181 | self.ref2ByUid = {} |
---|
182 | self.ref2ByLab = {} |
---|
183 | self.ref2NewLabs = set() |
---|
184 | self.ref2Byt2 = {} |
---|
185 | self.ref2Maps = collections.defaultdict(set) |
---|
186 | cc = collections.defaultdict( list ) |
---|
187 | for i in range(1,sht.nrows): |
---|
188 | r = [x.value for x in sht.row(i)] |
---|
189 | for x in range(6): |
---|
190 | if r[-x] in ["("('',)",)",'("(\'\',)",)',"('',)"]: |
---|
191 | r[-x] = '' |
---|
192 | uid = r[-1] |
---|
193 | lab = r[2] |
---|
194 | mapping = False |
---|
195 | mode = None |
---|
196 | if lab.find( ' ' ) != -1: |
---|
197 | l1,mode,l2 = lab.split(' ') |
---|
198 | assert mode in ['o','a'], 'Unrecognised mode in structure table (Label column): %s' % lab |
---|
199 | self.ref2Maps[l2].add(l1) |
---|
200 | mapping = True |
---|
201 | |
---|
202 | cm = r[9] |
---|
203 | cmid0 = r[15] |
---|
204 | cm,rv = importWbRef.cellMethodCheck(cm) |
---|
205 | if cm != '': |
---|
206 | if cm in self.cellm.cminfo: |
---|
207 | cmid = self.cellm.cminfo[cm][2] |
---|
208 | elif cm in self.cellm.cmmap: |
---|
209 | cm2 = self.cellm.cmmap[cm] |
---|
210 | cmid = self.cellm.cminfo[cm2][2] |
---|
211 | cm = cm2 |
---|
212 | else: |
---|
213 | print 'ERROR.cm.00001: cell methods not found: %s, %s' % (cm,r[9]) |
---|
214 | cmid = '__cm_not_found__' |
---|
215 | raise |
---|
216 | else: |
---|
217 | cmid = 'CellMethods::fixed' |
---|
218 | if cm in self.cellm.cmmap: |
---|
219 | cm = self.cellm.cmmap[cm] |
---|
220 | cmid = self.cellm.cminfo[cm][2] |
---|
221 | r[9] = cm |
---|
222 | r[15] = cmid |
---|
223 | t2 = tuple( [r[x] for x in inx] ) |
---|
224 | assert uid not in self.ref2ByUid, 'Duplicate uid in refStructure.xlsx: %s' % uid |
---|
225 | if lab in self.ref2ByLab: |
---|
226 | print 'SEVERE: Duplicate lab in refStructure.xlsx: %s' % lab |
---|
227 | ##assert lab not in self.ref2ByLab, 'Duplicate lab in refStructure.xlsx: %s' % lab |
---|
228 | self.ref2ByUid[uid] = r[:] |
---|
229 | self.ref2ByLab[lab] = uid |
---|
230 | |
---|
231 | if not mode == 'o': |
---|
232 | cc[t2].append( uid ) |
---|
233 | ##assert t2 not in self.ref2Byt2, 'Duplicate t2 in refStructure.xlsx: %s, %s' % (str(t2), uid) |
---|
234 | if t2 not in self.ref2Byt2: |
---|
235 | self.ref2Byt2[t2] = uid |
---|
236 | |
---|
237 | tt = [t for t in cc if len(cc[t]) > 1] |
---|
238 | if len(tt) > 0: |
---|
239 | print 'SEVERE: Duplicate dimensions encountered' |
---|
240 | oo = open( 'duplicateStructure.csv', 'w' ) |
---|
241 | for t in tt: |
---|
242 | oo.write( '#\t\n' ) |
---|
243 | for u in cc[t]: |
---|
244 | oo.write( '\t'.join( self.ref2ByUid[u] ) + '\n' ) |
---|
245 | oo.close() |
---|
246 | raise |
---|
247 | |
---|
248 | print 'IMPORT REF 02: PASSED SECOND CHECK' |
---|
249 | for x in self.ref2Maps: |
---|
250 | for l in self.ref2Maps[x]: |
---|
251 | assert x in self.ref2ByLab, 'Attempt to map to undefined structure: %s, %s' (x,l) |
---|
252 | |
---|
253 | for t in self.ref2Byt2: |
---|
254 | ### |
---|
255 | ### define lookup table for AerChemMIP |
---|
256 | ### |
---|
257 | ### |
---|
258 | cc = t[3] |
---|
259 | oo = t[2] |
---|
260 | if cc == '' and oo == '' and t[6] == '': |
---|
261 | cm = t[4] |
---|
262 | if cm in ['time: mean','time: point']: |
---|
263 | if cm == 'time: mean': |
---|
264 | tmode = 'mean' |
---|
265 | elif cm == 'time: point': |
---|
266 | tmode = 'point' |
---|
267 | else: |
---|
268 | tmode = '' |
---|
269 | cme = t[5] |
---|
270 | if cme in ['', 'area: areacella']: |
---|
271 | self.tmsp2str[ (t[0],t[1],tmode) ] = self.ref2Byt2[t] |
---|
272 | |
---|
273 | def importExRef(self): |
---|
274 | sh = shelve.open( 'sh/extraStructureRecordsRef', 'r' ) |
---|
275 | self.xRecByUid = {} |
---|
276 | self.xRecByTt = {} |
---|
277 | self.xRecByLab = {} |
---|
278 | xxx = [] |
---|
279 | for k in sh: |
---|
280 | self.xRecByUid[k] = sh[k] |
---|
281 | flg,rec = sh[k] |
---|
282 | nt = self.nt_dims._make( rec[:13] ) |
---|
283 | if flg: |
---|
284 | t1 = ( nt.tlab, nt.spid, nt.odims, nt.coords, nt.cell_methods, nt.cell_measures, nt.flag_meanings ) |
---|
285 | assert t1 not in self.xRecByTt, 'FATAL.xrec.00001: duplicate lookup tuple encountered %s' % str(t1) |
---|
286 | self.xRecByTt[t1] = k |
---|
287 | print 'xRecByLab:: ', nt.label, k |
---|
288 | assert nt.label not in self.xRecByLab, 'Duplicate label: %s' % nt.label |
---|
289 | self.xRecByLab[nt.label] = k |
---|
290 | else: |
---|
291 | xxx.append( (nt.label,k) ) |
---|
292 | |
---|
293 | sh.close() |
---|
294 | |
---|
295 | for k in self.xRecByUid: |
---|
296 | flg, rec = self.xRecByUid[k] |
---|
297 | if not flg: |
---|
298 | nt = self.nt_dims._make( rec[:13] ) |
---|
299 | t1 = ( nt.tlab, nt.spid, nt.odims, nt.coords, nt.cell_methods, nt.cell_measures, nt.flag_meanings ) |
---|
300 | if t1 in self.xRecByTt: |
---|
301 | print 'INFO.xrec.00002: duplicate lookup tuple found: %s' % str(t1) |
---|
302 | else: |
---|
303 | self.xRecByTt[t1] = k |
---|
304 | print 'xRecByLab:: ', nt.label, k |
---|
305 | assert nt.label not in self.xRecByLab, 'Duplicate label: %s' % nt.label |
---|
306 | self.xRecByLab[nt.label] = k |
---|
307 | |
---|
308 | def close(self): |
---|
309 | if len(self.orecs) != 0: |
---|
310 | sh = shelve.open( 'sh/extraStructureRecords', 'n' ) |
---|
311 | for flg,u,r in self.orecs: |
---|
312 | sh[u] = (flg,r) |
---|
313 | for k in self.xRecByUid: |
---|
314 | if k not in sh: |
---|
315 | sh[k] = self.xRecByUid[k] |
---|
316 | sh.close() |
---|
317 | |
---|
318 | def add2(self,cand): |
---|
319 | ## cand = [u,label,splab,tlab, odim, crd, cmet, cmea, flgv, flgm, desc, proc, prov] |
---|
320 | ## look-up structure based on time label, spatial id, other dimensions, coordinates, cell methods, cell measures, flag meanings |
---|
321 | err = False |
---|
322 | nt = self.nt2_dims._make( cand ) |
---|
323 | cm = nt.cell_methods |
---|
324 | if nt.cell_methods != '': |
---|
325 | cm,rv = importWbRef.cellMethodCheck(cm) |
---|
326 | if cm in self.cellm.cminfo: |
---|
327 | cmid = self.cellm.cminfo[cm][2] |
---|
328 | elif cm in self.cellm.cmmap: |
---|
329 | cm2 = self.cellm.cmmap[cm] |
---|
330 | cmid = self.cellm.cminfo[cm2][2] |
---|
331 | cm = cm2 |
---|
332 | else: |
---|
333 | print 'ERROR.cm.00001: cell methods not found: %s' % nt.cell_methods |
---|
334 | cmid = '__cm_not_found__' |
---|
335 | else: |
---|
336 | cmid = '' |
---|
337 | if cm in self.cellm.cmmap: |
---|
338 | cm = self.cellm.cmmap[cm] |
---|
339 | if cm != '': |
---|
340 | cmid = self.cellm.cminfo[cm][2] |
---|
341 | else: |
---|
342 | cmid = 'CellMethods::fixed' |
---|
343 | t2 = ( nt.tlab, nt.splab, nt.odims, nt.coords, cm, nt.cell_measures, nt.flag_meanings ) |
---|
344 | |
---|
345 | done = 0 |
---|
346 | if t2 in self.ref2Byt2: |
---|
347 | u = self.ref2Byt2[t2] |
---|
348 | rc = 0 |
---|
349 | label = self.ref2ByUid[u][2] |
---|
350 | print 'INFO.structure.10001: label reset: ',t2,label |
---|
351 | done = 1 |
---|
352 | else: |
---|
353 | u = cand[0] |
---|
354 | spid = self.spsh.uidByLab[ nt.splab ] |
---|
355 | tmid = self.spsh.tByLab[nt.tlab][0] |
---|
356 | #### tlab, splab, odims, coords, cell_met, cell_mea, flag mean |
---|
357 | t1 = ( tmid, spid, nt.odims, nt.coords, cm, nt.cell_measures, nt.flag_meanings ) |
---|
358 | if t2 in self.modStr.lookup: |
---|
359 | l2 = self.modStr.lookup[t2] |
---|
360 | if l2 not in self.ref2ByLab: |
---|
361 | print 'ERROR.structure.01020: tuple lookup maps to unlnown label: ',t2,l2 |
---|
362 | else: |
---|
363 | u = self.ref2ByLab[l2] |
---|
364 | label = self.ref2ByUid[u][2] |
---|
365 | done = 2 |
---|
366 | rc = 0 |
---|
367 | print 'INFO.structure.10002: label reset: ',t2,l2,label |
---|
368 | |
---|
369 | if done == 0: |
---|
370 | print 'ERROR.structure.01010: tuple lookup not found: ',cand,t2 |
---|
371 | label = None |
---|
372 | while label == None or label in self.ref2ByLab or label in self.newStrLabels: |
---|
373 | self.kr += 1 |
---|
374 | label = 'str-d%2.2i' % self.kr |
---|
375 | assert self.kr < 100, 'Too many additional structure records required' |
---|
376 | self.newStrLabels.add( label ) |
---|
377 | tmid = self.spsh.tByLab[nt.tlab][0] |
---|
378 | cids = '' |
---|
379 | dids = '' |
---|
380 | ats = [nt.splab, nt.tlab, nt.label,nt.label, nt.description, '','ivg.add2',nt.odims,nt.coords, cm,nt.cell_measures,nt.flag_meanings,'',spid,tmid,cmid,cids,dids,u] |
---|
381 | self.ref2Byt2[t2] = u |
---|
382 | self.ref2ByUid[u] = ats |
---|
383 | rc = 1 |
---|
384 | |
---|
385 | if label in self.strRepMap: |
---|
386 | print 'INFO.strmap.00002: mapping %s' % label |
---|
387 | label = self.strRepMap[label] |
---|
388 | u = self.ref2ByLab[label] |
---|
389 | |
---|
390 | self.ll.add( (u,'') ) |
---|
391 | return (rc,u,label) |
---|
392 | |
---|
393 | def add(self,cand,t1=None): |
---|
394 | ## cand = [u,label,spid,tlab, odim, crd, cmet, cmea, flgv, flgm, desc, proc, prov] |
---|
395 | ## look-up structure based on time label, spatial id, other dimensions, coordinates, cell methods, cell measures, flag meanings |
---|
396 | err = False |
---|
397 | if t1 == None: |
---|
398 | cm, fl = cellMethodCheck( cand[6] ) |
---|
399 | if fl == 1: |
---|
400 | cand[6] = cm |
---|
401 | assert len(cm) < 11 or cm[:10] != 'time: mean', 'Should not have time mean first .... %s, %s, %s' % ( str(cand), cm.find( 'time: mean' ), fl ) |
---|
402 | nt = self.nt_dims._make( cand ) |
---|
403 | cm = self.cellm.cmmap.get( cm,cm) |
---|
404 | if cm not in self.cellm.cminfo: |
---|
405 | print 'SEVERE.cellm.0001: Cell method lookup failed: %s' % cm |
---|
406 | cmttl = cm |
---|
407 | cmlab = '---' |
---|
408 | else: |
---|
409 | cmttl, cmlab = self.cellm.cminfo[cm][:2] |
---|
410 | |
---|
411 | t1 = ( nt.tlab, nt.spid, nt.odims, nt.coords, nt.cell_methods, nt.cell_measures, nt.flag_meanings ) |
---|
412 | u = nt.uid |
---|
413 | label = nt.label |
---|
414 | assert nt.spid in self.spsh.ssu, 'Spatial id not found in spsh.ssu: %s' % nt.spid |
---|
415 | |
---|
416 | slab, sttl,sdim = self.spsh.ssu[ nt.spid ][:3] |
---|
417 | tttl = self.spsh.tByLab[nt.tlab][1] |
---|
418 | title = '%s, %s [%s]' % (tttl, sttl, slab) |
---|
419 | if string.strip( nt.coords ) != '' or string.strip( nt.odims ) != '': |
---|
420 | title += ' {%s:%s}' % (nt.odims,nt.coords) |
---|
421 | if cmlab != '': |
---|
422 | title += ' [%s]' % cmlab |
---|
423 | |
---|
424 | else: |
---|
425 | assert False, 'Should not be here .....' |
---|
426 | |
---|
427 | ##tt = tuple( [nt.__dict__[k] for k in ['spid','tlab','odims','coords','cell_methods','cell_measures','flag_values','flag_meanings','procNote','prov']] ) |
---|
428 | |
---|
429 | if t1 in self.tmap: |
---|
430 | ## Label Title Spatial shape Other dims coords cell methods cell measures flag meanings flag values Spatial shape (title) Spatial dimensions |
---|
431 | |
---|
432 | u, label, title0, description, fv, tlab, tmid = self.tmap[t1] |
---|
433 | if fv != nt.flag_values: |
---|
434 | print 'ERROR.structure.01020: flag_value mismatch for given flag_meanings: ',cand, fv |
---|
435 | err = True |
---|
436 | if u != nt.uid or label != nt.label: |
---|
437 | cand[0] = u |
---|
438 | cand[1] = label |
---|
439 | nt = self.nt_dims._make( cand ) |
---|
440 | orec = tuple( cand + [title,sttl,sdim] ) |
---|
441 | self.orecs.add( (1,u,orec) ) |
---|
442 | else: |
---|
443 | ## |
---|
444 | ## not clear how we get here ... editing structure details upsets the links from variables in awkward ways. |
---|
445 | ## |
---|
446 | crashOnFail = False |
---|
447 | if crashOnFail: |
---|
448 | assert t1 in self.xRecByTt, 'tuple not found in completed list: %s' % str(t1) |
---|
449 | if t1 not in self.xRecByTt: |
---|
450 | print 'SEVERE.depr.00001: tuple not found in completed list ...', t1 |
---|
451 | if t1 not in self.messageDone: |
---|
452 | print 'ERROR.structure.01010: tuple lookup not found: ',cand,t1 |
---|
453 | if t1 in self.xRecByTt: |
---|
454 | u = self.xRecByTt[t1] |
---|
455 | flg, orec = self.xRecByUid[u] |
---|
456 | elif t1[1] in self.spsh.labByUid: |
---|
457 | slab = self.spsh.labByUid[t1[1]] |
---|
458 | ## cand = [u,label,spid,tlab, odim, crd, cmet, cmea, flgv, flgm, desc, proc, prov] |
---|
459 | ## Label Title Spatial shape Other dims coords cell methods cell measures flag meanings flag values Spatial shape (title) Spatial dimensions |
---|
460 | label = None |
---|
461 | while label == None or label in self.xRecByLab: |
---|
462 | self.kr += 1 |
---|
463 | label = 'str-d%2.2i' % self.kr |
---|
464 | self.newStrLabels.add( label ) |
---|
465 | assert self.kr < 100, 'Too many additional structure records required' |
---|
466 | cand[1] = label |
---|
467 | orec = tuple( cand + [title,sttl,sdim] ) |
---|
468 | self.orecs.add( (0,u,orec) ) |
---|
469 | |
---|
470 | else: |
---|
471 | slab = '**spatial structure not found** .. %s' % t1[1] |
---|
472 | assert False, 'Spatial structures must be consistent with spatialShape_ref.xls' |
---|
473 | print 'EINFO.structure.01000: ',label,t1,slab |
---|
474 | nnmm = 0 |
---|
475 | for tt in self.tmap: |
---|
476 | if self.tmap[tt][1] == 'str-195': |
---|
477 | print 'EINFO.structure.01040: ',tt, self.tmap[tt] |
---|
478 | nm = sum( [tt[k] == t1[k] for k in range(7)] ) |
---|
479 | if nm > 5: |
---|
480 | nnmm += 1 |
---|
481 | assert tt[1] in self.spsh.labByUid, 'Spatial shape not found: %s' % str(tt) |
---|
482 | slab = self.spsh.labByUid[tt[1]] |
---|
483 | lab = self.tmap[tt][1] |
---|
484 | print 'EINFO.structure.01010: ',lab,tt, slab |
---|
485 | if nnmm == 0: |
---|
486 | print 'EINFO.structure.01020: No close matches found' |
---|
487 | self.messageDone.add(t1) |
---|
488 | else: |
---|
489 | assert t1 in self.ee3, 'Should have caught t1 before here ... %s' % str(t1) |
---|
490 | ### if self.ee3.has_key( t1 ): |
---|
491 | u = self.ee3[t1] |
---|
492 | |
---|
493 | self.ee3[t1] = u |
---|
494 | if u in self.lldict: |
---|
495 | if self.lldict[u] != nt: |
---|
496 | print 'ERROR.ivg-str.02010: duplicate u, different nt: %s' % u |
---|
497 | print 'ERROR.ivg-str.02011: ' , nt |
---|
498 | print 'ERROR.ivg-str.02012: ', self.lldict[u] |
---|
499 | else: |
---|
500 | self.lldict[u] = nt |
---|
501 | self.ll.append( (u,nt) ) |
---|
502 | return (err,u,label) |
---|
503 | |
---|
504 | freqmap = {'daily':'day', 'Annual':'yr', 'Timestep':'subhr', '1day':'day', '1mon':'mon', 'month':'mon', 'year':'yr', 'monthly':'mon', 'Day':'day', '6h':'6hr', '3 hourly':'3hr', '3 Hourly':'3hr' } |
---|
505 | |
---|
506 | class c1(object): |
---|
507 | def __init__(self): |
---|
508 | self.a = collections.defaultdict( list ) |
---|
509 | |
---|
510 | doObsol = False |
---|
511 | if doObsol: |
---|
512 | for k in rq.cmip5Grps.keys(): |
---|
513 | bits = string.split(k,'_') |
---|
514 | tab = bits[0] |
---|
515 | for v in rq.cmip5Grps[k]: |
---|
516 | cmip5GrpLk[tab][v] = k |
---|
517 | |
---|
518 | class __prepRequestVar(object): |
---|
519 | def __init__(self, addex): |
---|
520 | self.eern = collections.defaultdict( list ) |
---|
521 | self.eern2 = {} |
---|
522 | self.err0010 = collections.defaultdict( int ) |
---|
523 | self.err0020 = collections.defaultdict( int ) |
---|
524 | |
---|
525 | self.exrqvg = {} |
---|
526 | self.vgByLabel = {} |
---|
527 | if addex: |
---|
528 | ### |
---|
529 | extravg = [ |
---|
530 | ['OMIP', u'OMIP-Oyr', u'OMIP: OMIP-Oyr', 'CMIP5Rev', u'OMIP.Oyr'], |
---|
531 | ## ['OMIP', u'OMIP-Omon', u'OMIP: OMIP-Omon', 'CMIP5Rev', u'OMIP.Omon'], |
---|
532 | ['OMIP', u'OMIP-day', u'OMIP: OMIP-day', 'CMIP5Rev', u'OMIP.day'], |
---|
533 | ##['OMIP', u'OMIP-fx', u'OMIP: OMIP-fx', 'CMIP5Rev', u'OMIP.fx'], |
---|
534 | ## ['AerChemMIP','aermonthly', 'AerChemMIP: monthly', 'CMIP6', 'AerChemMIP.mon'], |
---|
535 | ] |
---|
536 | for r in extravg: |
---|
537 | thisuuid = str( uuid.uuid1() ) |
---|
538 | self.exrqvg[thisuuid] = [thisuuid ,] + r[:] |
---|
539 | self.vgByLabel[r[1]] = thisuuid |
---|
540 | |
---|
541 | ixrn = 5 |
---|
542 | dups = [] |
---|
543 | for i,k in [ (rq.rqvg[k],k) for k in rq.rqvg.keys() if k[0] != '_']: |
---|
544 | kk = ['uid', 'mip', 'tab', 'objective', 'grid', 'gridreq', 'comment', 'ref', 'refNote', 'refid'] |
---|
545 | kk = ['uuid', 'mip', 'label', 'title', 'ref', 'refNote'] |
---|
546 | |
---|
547 | self.eern[i[ixrn]].append( i[0] ) |
---|
548 | k2 = '%s__%s' % (i[ixrn-1],i[ixrn]) |
---|
549 | if self.eern2.has_key( k2 ): |
---|
550 | dups.append( (k2,k,self.eern2[k2]) ) |
---|
551 | self.eern2[k2] = i[0] |
---|
552 | assert i[0] == k, 'Bad key/uuid' |
---|
553 | |
---|
554 | assert len( dups ) == 0, 'Duplicate refs: %s' % str(dups) |
---|
555 | |
---|
556 | ## |
---|
557 | ## add var groups if missing |
---|
558 | ## |
---|
559 | self.addExrqvg = [] |
---|
560 | for i,k in [(self.exrqvg[k],k) for k in self.exrqvg.keys()]: |
---|
561 | kk = ['uid', 'mip', 'tab', 'objective', 'grid', 'gridreq', 'comment', 'ref', 'refNote', 'refid'] |
---|
562 | kk = ['uuid', 'mip', 'label', 'title', 'ref', 'refNote'] |
---|
563 | |
---|
564 | self.eern[i[ixrn]].append( i[0] ) |
---|
565 | k2 = '%s__%s' % (i[ixrn-1],i[ixrn]) |
---|
566 | if not self.eern2.has_key( k2 ): |
---|
567 | self.addExrqvg.append( k ) |
---|
568 | self.eern2[k2] = i[0] |
---|
569 | assert i[0] == k, 'Bad key/uuid' |
---|
570 | |
---|
571 | def getvgid( self, ll ): |
---|
572 | kvg = 'CMIP5Rev__%s.%s' % ( ll[2],ll[1] ) |
---|
573 | kvg2 = '%s__%s' % ( ll[2],ll[1] ) |
---|
574 | kvg3 = 'rev__%s.%s' % ( ll[2],ll[1] ) |
---|
575 | if self.eern2.has_key(kvg): |
---|
576 | vgid = self.eern2[kvg] |
---|
577 | elif self.eern2.has_key(kvg3): |
---|
578 | vgid = self.eern2[kvg3] |
---|
579 | elif self.eern2.has_key(kvg2): |
---|
580 | vgid = self.eern2[kvg2] |
---|
581 | elif ll[0] != None and cmip5GrpLk[ll[1]].has_key(ll[0]): |
---|
582 | g2 = cmip5GrpLk[ll[1]][ll[0]] |
---|
583 | kvg2 = 'CMIP5Rev__%s.%s' % ( ll[2],g2 ) |
---|
584 | if self.eern2.has_key(kvg2): |
---|
585 | vgid = self.eern2[kvg2] |
---|
586 | else: |
---|
587 | vgid = '__vg_not_found_1__' |
---|
588 | self.err0010[(ll[2],ll[1] )] += 1 |
---|
589 | else: |
---|
590 | vgid = '__vg_not_found_2__' |
---|
591 | self.err0020[(ll[2],ll[1] )] += 1 |
---|
592 | return vgid |
---|
593 | |
---|
594 | def run(self): |
---|
595 | self.rqv_vg = {} |
---|
596 | for k in rq.revti.keys(): |
---|
597 | if k[0] != '_': |
---|
598 | ll = rq.revti[k] |
---|
599 | vgid = self.getvgid( ll ) |
---|
600 | self.rqv_vg['%s.%s.%s' % (ll[2],ll[1],ll[0])] = vgid |
---|
601 | |
---|
602 | class refCmv(object): |
---|
603 | def __init__(self): |
---|
604 | self.refUid = collections.defaultdict( dict ) |
---|
605 | sh = shelve.open( '../framework/inSh/refCmvId', 'r' ) |
---|
606 | for k in sh.keys(): |
---|
607 | tab, lab, ver = sh[k] |
---|
608 | self.refUid[tab][lab] = k |
---|
609 | sh.close() |
---|
610 | |
---|
611 | class checkCmv(object): |
---|
612 | def __init__(self,tlist): |
---|
613 | # |
---|
614 | # sort by cmor name and frequency |
---|
615 | # |
---|
616 | cc = collections.defaultdict( list ) |
---|
617 | for tt in tlist: |
---|
618 | fr = freqmap.get( tt.freq, tt.freq ) |
---|
619 | prov = '%s [%s]' % (tt.mip,tt.group) |
---|
620 | cc[ '%s.%s' % (fr,tt.cmorv)].append( (tt.uid, prov, tt.var ) ) |
---|
621 | |
---|
622 | nm = 0 |
---|
623 | cc2 = collections.defaultdict( list ) |
---|
624 | cc3 = collections.defaultdict( dict ) |
---|
625 | ee = {} |
---|
626 | for k in cc.keys(): |
---|
627 | if len( cc[k] ) > 1: |
---|
628 | nm += 1 |
---|
629 | plist = tuple( sorted( [i[1] for i in cc[k]] ) ) |
---|
630 | ## |
---|
631 | ## sort by provenance groupings |
---|
632 | ## |
---|
633 | cc2[plist].append( k ) |
---|
634 | ## |
---|
635 | ## indexed by provenance (NB there could be duplication of "prov" values leading to over-writing |
---|
636 | ## |
---|
637 | for i in cc[k]: |
---|
638 | cc3[k][i[1]] = i |
---|
639 | |
---|
640 | print 'INFO.001.22222: number of duplicate cmor table entries in annex =',nm |
---|
641 | ## |
---|
642 | ## pick out the C4MIP, PMIP pairs |
---|
643 | ## |
---|
644 | self.uidReplace = {} |
---|
645 | for k in sorted( cc2.keys() ): |
---|
646 | if len(k) == 2 and k[0][:5] == 'C4MIP' and k[1][:4] == 'PMIP': |
---|
647 | for k2 in cc2[k]: |
---|
648 | print 'Pref: %s; depricated : %s ' % (cc3[k2][k[0]], cc3[k2][k[1]] ) |
---|
649 | self.uidReplace[cc3[k2][k[1]][0]] = cc3[k2][k[0]][0] |
---|
650 | print 'INFO.001.22222: ',k, len( cc2[k] ) |
---|