1 | """Parsing adjusted CMIP5 tables. |
---|
2 | """ |
---|
3 | import string, shelve, uuid |
---|
4 | from fcc_utils2 import snlist |
---|
5 | import xlrd, string, shelve, os, re, sys, stat |
---|
6 | import collections, hashlib |
---|
7 | import xlutils, xlwt |
---|
8 | import xlutils.copy |
---|
9 | #### |
---|
10 | import dreq_cfg |
---|
11 | from utils_wb import wbcp, workbook, uniCleanFunc |
---|
12 | |
---|
13 | vdate = '20160309' |
---|
14 | |
---|
15 | nt__deckrq = collections.namedtuple( 'dckrq', ['control','AMIP','abrupt4xCO2','rq_1pctCO2','historical'] ) |
---|
16 | nt__grptbl = collections.namedtuple( 'grptbl', ['grp','var','srcTable','freq','description','shape','levels','timeProc','mask','priority','mip','id'] ) |
---|
17 | nt__newtbl = collections.namedtuple( 'newtbl', ['var','sn','stat','grid','units','long_name','description','priority','obs','mip','id'] ) |
---|
18 | dd_rq = collections.defaultdict( dict ) |
---|
19 | dd_tbl = collections.defaultdict( int ) |
---|
20 | |
---|
21 | def md5sum(filename, blocksize=65536): |
---|
22 | hash = hashlib.md5() |
---|
23 | with open(filename, "r+b") as f: |
---|
24 | for block in iter(lambda: f.read(blocksize), ""): |
---|
25 | hash.update(block) |
---|
26 | return hash.hexdigest() |
---|
27 | |
---|
28 | #### shelve to record file access times and checksums |
---|
29 | shr1 = shelve.open( 'shref01/shr1_md5' ) |
---|
30 | |
---|
31 | class tupsort: |
---|
32 | def __init__(self,k=0): |
---|
33 | self.k = k |
---|
34 | def cmp(self,x,y): |
---|
35 | return cmp( x[self.k], y[self.k] ) |
---|
36 | |
---|
37 | def uniquify( ll ): |
---|
38 | ll.sort() |
---|
39 | l0 = [ll[0],] |
---|
40 | for l in ll[1:]: |
---|
41 | if l != l0[-1]: |
---|
42 | l0.append(l) |
---|
43 | return l0 |
---|
44 | |
---|
45 | def getRowValues( ll, minLen=0, maxLen=0): |
---|
46 | oo = [] |
---|
47 | for i in ll: |
---|
48 | oo.append( i.value ) |
---|
49 | if len(oo) >= minLen: |
---|
50 | return oo[:minLen] |
---|
51 | for i in range(minLen+1): |
---|
52 | if len(oo) == minLen: |
---|
53 | return oo |
---|
54 | oo.append( '' ) |
---|
55 | if maxLen > 0: |
---|
56 | return oo[:maxLen] |
---|
57 | return oo |
---|
58 | |
---|
59 | clabs = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
---|
60 | def clab(n): |
---|
61 | i = n/26 |
---|
62 | assert i < 26, 'Not ready for row number greater than 26*26' |
---|
63 | if i == 0: |
---|
64 | return clabs[n] |
---|
65 | else: |
---|
66 | return clabs[i-1] + clabs[ n - i*26] |
---|
67 | |
---|
68 | def getRow(sht): |
---|
69 | ee = {} |
---|
70 | for i in range(sht.nrows): |
---|
71 | if sht.row(i)[0].ctype == 2 and str( sht.row(i)[0].value ) != '0.0': |
---|
72 | l = map( lambda x: str( x.value ), sht.row(i) ) |
---|
73 | k = l[5] |
---|
74 | try: |
---|
75 | ee[k] = l |
---|
76 | except: |
---|
77 | print l |
---|
78 | raise |
---|
79 | return ee |
---|
80 | |
---|
81 | def outvSer( ov ): |
---|
82 | ll = [] |
---|
83 | for i in ov: |
---|
84 | ll.append( '%s|%s|%s' % tuple( map( str, i) ) ) |
---|
85 | return string.join(ll, '; ' ) |
---|
86 | def joinRec( rl, tab='\t', omit=[],lmax=None ): |
---|
87 | l1 = len(rl) |
---|
88 | if lmax != None and l1 > lmax: |
---|
89 | l1 = lmax |
---|
90 | rl0 = [] |
---|
91 | for i in range(l1): |
---|
92 | if i not in omit: |
---|
93 | rl0.append(string.strip(str(rl[i]))) |
---|
94 | return string.join( map( str, rl0), tab ) |
---|
95 | |
---|
96 | |
---|
97 | def lennbl(ll): |
---|
98 | i = 0 |
---|
99 | ii = 0 |
---|
100 | for l in ll: |
---|
101 | i+= 1 |
---|
102 | if l != '': |
---|
103 | ii = i |
---|
104 | return ii |
---|
105 | |
---|
106 | def matchVals( thisv, valset ): |
---|
107 | ll1 = [] |
---|
108 | ll2 = [] |
---|
109 | lk0 = [] |
---|
110 | kk = 0 |
---|
111 | for vals in valset: |
---|
112 | l1 = min( [lennbl(thisv), lennbl(vals)] ) |
---|
113 | l2 = max( [lennbl(thisv), lennbl(vals)] ) |
---|
114 | ll = [] |
---|
115 | for i in range(l1): |
---|
116 | ll.append( vals[i] == thisv[i] ) |
---|
117 | for i in range(l2-l1): |
---|
118 | ll.append(False) |
---|
119 | if all(ll): |
---|
120 | return ll |
---|
121 | k = 0 |
---|
122 | for l in ll: |
---|
123 | if l: |
---|
124 | k+=1 |
---|
125 | if ll[0]: |
---|
126 | lk0.append( kk ) |
---|
127 | |
---|
128 | ll1.append(k) |
---|
129 | ll2.append(ll) |
---|
130 | kk += 1 |
---|
131 | k0 = max( ll1 ) |
---|
132 | if len(lk0) > 0: |
---|
133 | k0 = lk0[0] |
---|
134 | return ll2[ k0 ] |
---|
135 | return ll2[ ll1.index(k0) ] |
---|
136 | |
---|
137 | class mipTable(object): |
---|
138 | |
---|
139 | def __init__(self,id,section): |
---|
140 | self.vars = collections.defaultdict( list ) |
---|
141 | self.id = id |
---|
142 | self.section = section |
---|
143 | |
---|
144 | class s0(object): |
---|
145 | def __init__(self): |
---|
146 | self.a = collections.defaultdict( list ) |
---|
147 | class s1(object): |
---|
148 | def __init__(self): |
---|
149 | self.a = collections.defaultdict( s0 ) |
---|
150 | |
---|
151 | class rqsummary(object): |
---|
152 | __doc__ = """Create a request summary table, by MIP and variable table. |
---|
153 | Creates a list of default dictionaries, one for each table. An entry for each MIP in each dictionary. |
---|
154 | """ |
---|
155 | |
---|
156 | def __init__(self,exptInfo=None): |
---|
157 | self.tablist = [] |
---|
158 | self.tabindx = {} |
---|
159 | self.mips = collections.defaultdict( int ) |
---|
160 | self.vars = collections.defaultdict( list ) |
---|
161 | self.t = {} |
---|
162 | self.records = [] |
---|
163 | self.recordProv = {} |
---|
164 | self.exptInfo = exptInfo |
---|
165 | self.ref1 = collections.defaultdict( s1 ) |
---|
166 | for k in shr1.keys(): |
---|
167 | r = shr1[k] |
---|
168 | ## index on mip/file name/md5 |
---|
169 | self.ref1[r[-1]].a[r[0]].a[k] = r |
---|
170 | for k in self.ref1.keys(): |
---|
171 | for k1 in self.ref1[k].a.keys(): |
---|
172 | if len( self.ref1[k].a[k1].a.keys() ) > 1: |
---|
173 | ll = {} |
---|
174 | for k2 in self.ref1[k].a[k1].a.keys(): |
---|
175 | ll[ self.ref1[k].a[k1].a[k2][2] ] = k2 |
---|
176 | ks = ll.keys() |
---|
177 | ks.sort() |
---|
178 | thisk = ll[ks[-1]] |
---|
179 | assert thisk in self.ref1[k].a[k1].a, 'ERROR referencing ref1.a.a ... with %s' % thisk |
---|
180 | self.ref1[k].a[k1].latest = self.ref1[k].a[k1].a[ thisk ] |
---|
181 | else: |
---|
182 | assert self.ref1[k].a[k1].a.keys()[0] in self.ref1[k].a[k1].a, 'ERROR referencing ref1.a.a ... with %s' % (self.ref1[k].a[k1].a.keys()[0] ) |
---|
183 | self.ref1[k].a[k1].latest = self.ref1[k].a[k1].a[ self.ref1[k].a[k1].a.keys()[0] ] |
---|
184 | |
---|
185 | def addTab(self,tab,section): |
---|
186 | self.t[tab] = mipTable( tab,section) |
---|
187 | |
---|
188 | def add(self,mip,path,tab,tabmode='uid', extra=None): |
---|
189 | self.tab = tab |
---|
190 | ## rq will add a reference to parent |
---|
191 | ##print '######',mip, path, tab |
---|
192 | return rq(mip,path,parent=self,tab=tab,tabmode=tabmode,section=extra) |
---|
193 | |
---|
194 | def addItem( self, mip, tab, nn ): |
---|
195 | if not self.tabindx.has_key( tab ): |
---|
196 | self.tabindx[tab] = len(self.tablist) |
---|
197 | self.tablist.append( collections.defaultdict( int ) ) |
---|
198 | ix = self.tabindx[tab] |
---|
199 | self.tablist[ix][mip] = nn |
---|
200 | self.mips[mip] += 1 |
---|
201 | |
---|
202 | def checkVars(self): |
---|
203 | keys = self.vars.keys() |
---|
204 | keys.sort() |
---|
205 | for k in keys: |
---|
206 | if len( self.vars[k] ) == 1: |
---|
207 | print 'INFO.00010.0001: ',k, 'singleton' |
---|
208 | else: |
---|
209 | li = self.vars[k] |
---|
210 | ## getting a bit intricate here -- 3rd element is index of record row |
---|
211 | ll = [self.records[li[0][2]], ] |
---|
212 | aa = collections.defaultdict( list ) |
---|
213 | aa[0].append[0] |
---|
214 | |
---|
215 | def prep(self,mode='vars',tab=None): |
---|
216 | if tab != None: |
---|
217 | self.tab = tab |
---|
218 | kk = 0 |
---|
219 | mips = self.mips.keys() |
---|
220 | mips.sort() |
---|
221 | keys = self.tabindx.keys() |
---|
222 | keys.sort() |
---|
223 | self.oorecs = [] |
---|
224 | if mode == 'vars': |
---|
225 | keys = self.t[self.tab].vars.keys() |
---|
226 | keys.sort() |
---|
227 | for k in keys: |
---|
228 | rec = [k,] |
---|
229 | ee1 = collections.defaultdict( list ) |
---|
230 | ee2 = {} |
---|
231 | for t in self.t[self.tab].vars[k]: |
---|
232 | k0 = joinRec( self.records[t[2]], tab=',', omit=[7,16,23] ) |
---|
233 | ee1[k0].append( t ) |
---|
234 | ### |
---|
235 | ### differences in records found as multiple entries in collection here ... |
---|
236 | ### |
---|
237 | if len(ee1.keys()) <= 1: |
---|
238 | pass |
---|
239 | ##print 'INFO.003: NO CHANGE AT %s (%s)' % (k,len(self.t[self.tab].vars[k]) ) |
---|
240 | else: |
---|
241 | print 'INFO.003: DUPLICATES AT %s (%s)' % (k,len(ee1.keys()) ) |
---|
242 | for k1 in ee1.keys(): |
---|
243 | for t in ee1[k1]: |
---|
244 | ee2[t[0]] = k1 |
---|
245 | k1 = ee1.keys() |
---|
246 | ee3 = collections.defaultdict( list ) |
---|
247 | for t in self.t[self.tab].vars[k]: |
---|
248 | ee3[k1.index( ee2[t[0]] ) ].append( '%s[%s]' % (t[0],t[1]) ) |
---|
249 | rec.append( '%s[%s]{%s}' % (t[0],t[1],k1.index( ee2[t[0]]) ) ) |
---|
250 | ##self.oorecs.append(rec) |
---|
251 | for kkk in k1: |
---|
252 | ix = ee1[kkk][0][2] |
---|
253 | ik = k1.index(kkk) |
---|
254 | rec = [k, string.join(ee3[ik],';'),] + list(self.records[ix]) |
---|
255 | self.oorecs.append(rec) |
---|
256 | |
---|
257 | def showXls(self,wb,r0=3,offset=0): |
---|
258 | r = r0 |
---|
259 | thisStj = 2 |
---|
260 | for rec in self.oorecs: |
---|
261 | i = offset |
---|
262 | for item in rec: |
---|
263 | wb.putValue2( r,i, item ) |
---|
264 | i+= 1 |
---|
265 | thisStj = 5 - thisStj |
---|
266 | r+= 1 |
---|
267 | |
---|
268 | def show(self,oo,mode='vars',tab=None): |
---|
269 | if tab != None: |
---|
270 | self.tab = tab |
---|
271 | kk = 0 |
---|
272 | mips = self.mips.keys() |
---|
273 | mips.sort() |
---|
274 | keys = self.tabindx.keys() |
---|
275 | keys.sort() |
---|
276 | if mode == 'vars': |
---|
277 | for rec in self.oorecs: |
---|
278 | srec = joinRec( rec) + '\t' |
---|
279 | oo.write( srec + '\n' ) |
---|
280 | |
---|
281 | elif mode == 'oldvars': |
---|
282 | keys = self.t[self.tab].vars.keys() |
---|
283 | keys.sort() |
---|
284 | for k in keys: |
---|
285 | rec = '%s\t' % k |
---|
286 | ee1 = collections.defaultdict( list ) |
---|
287 | ee2 = {} |
---|
288 | for t in self.t[self.tab].vars[k]: |
---|
289 | k0 = joinRec( self.records[t[2]], tab=',', omit=[7,16,23] ) |
---|
290 | ee1[k0].append( t ) |
---|
291 | if len(ee1.keys()) > 1: |
---|
292 | for k1 in ee1.keys(): |
---|
293 | for t in ee1[k1]: |
---|
294 | ee2[t[0]] = k1 |
---|
295 | k1 = ee1.keys() |
---|
296 | for t in self.t[self.tab].vars[k]: |
---|
297 | rec += '%s[%s]{%s}\t' % (t[0],t[1],k1.index( ee2[t[0]]) ) |
---|
298 | oo.write( rec + '\n' ) |
---|
299 | for kkk in k1: |
---|
300 | ix = ee1[kkk][0][2] |
---|
301 | rec = '--%s--\t' % k1.index(kkk) + joinRec( self.records[ix] ) + '\t' |
---|
302 | oo.write( rec + '\n' ) |
---|
303 | else: |
---|
304 | oo.write( ',' + string.join(mips, ',') + ',\n' ) |
---|
305 | for k in keys: |
---|
306 | r = [] |
---|
307 | ix = self.tabindx[k] |
---|
308 | for m in mips: |
---|
309 | r.append( self.tablist[ix][m] ) |
---|
310 | if max(r) > 0: |
---|
311 | rec = '%s,' % k |
---|
312 | for i in r: |
---|
313 | rec += '%s,' % i |
---|
314 | oo.write( rec + '\n' ) |
---|
315 | |
---|
316 | #dd_rq = collections.defaultdict( dict ) |
---|
317 | #dd_tbl = collections.defaultdict( int ) |
---|
318 | class rq(object): |
---|
319 | """Class rq |
---|
320 | -------- |
---|
321 | This class will parse a modified CMIP5 standard output sheet. |
---|
322 | """ |
---|
323 | |
---|
324 | def __init__(self, mip,path, parent=None,tab=u'Omon',tabmode='uid',section=None ): |
---|
325 | |
---|
326 | self.mip = mip |
---|
327 | self.section = section |
---|
328 | self.parent = parent |
---|
329 | self.nn = 0 |
---|
330 | self.fn = string.split( path, '/' )[-1] |
---|
331 | stt = os.stat( path ) |
---|
332 | self.ctime = stt[stat.ST_CTIME] |
---|
333 | self.fsize = stt[stat.ST_SIZE] |
---|
334 | print 'INFO.001.00020: >>>> %s: %s' % (self.fn,self.ctime) |
---|
335 | if self.parent.ref1[mip].a.has_key(self.fn): |
---|
336 | if self.fn not in self.parent.ref1[mip].a: |
---|
337 | md5 = md5sum( path ) |
---|
338 | shr1[md5] = (self.fn,path,self.ctime, self.fsize, md5,mip ) |
---|
339 | else: |
---|
340 | tt = self.parent.ref1[mip].a[self.fn].latest |
---|
341 | if len( tt ) > 3: |
---|
342 | if self.ctime == tt[2] and self.fsize == tt[3]: |
---|
343 | print 'INFO.001.00030: %s unchanged' % self.fn |
---|
344 | md5 = tt[4] |
---|
345 | else: |
---|
346 | md5 = md5sum( path ) |
---|
347 | if md5 == tt[4]: |
---|
348 | print 'INFO.001.00040: md5 unchanged: %s' % self.fn |
---|
349 | else: |
---|
350 | shr1[md5] = (self.fn,path,self.ctime, self.fsize, md5,mip ) |
---|
351 | else: |
---|
352 | print 'ERROR.888.0001: Unexpected ref1 record (input status): %s' % str(tt) |
---|
353 | md5 = md5sum( path ) |
---|
354 | else: |
---|
355 | md5 = md5sum( path ) |
---|
356 | print 'INFO.888.0001: setting md5 for %s' % path |
---|
357 | shr1[md5] = (self.fn,path,self.ctime, self.fsize, md5,mip ) |
---|
358 | self.md5 = md5 |
---|
359 | self.wk1 = wbcp( inbook=path ) |
---|
360 | if tab in self.wk1.sns: |
---|
361 | self.tab = tab |
---|
362 | if tabmode == 'tab' or mip == 'CMIP5': |
---|
363 | self.tabid = tab |
---|
364 | else: |
---|
365 | self.tabid = '%s.%s' % (mip,tab) |
---|
366 | self.wk1.focus( tab) |
---|
367 | if not self.parent.t.has_key(self.tabid): |
---|
368 | self.parent.addTab(self.tabid,self.section) |
---|
369 | self.ok = self.parse01() |
---|
370 | if not self.ok: |
---|
371 | self.reason = 'Parse01 failed' |
---|
372 | else: |
---|
373 | self.reason = 'Table not present' |
---|
374 | self.ok = False |
---|
375 | |
---|
376 | def parse01(self): |
---|
377 | """Parsing revised CMIP5 standard output""" |
---|
378 | |
---|
379 | ee1 = collections.defaultdict( list ) |
---|
380 | for j in range(3,self.wk1.currentSi.nrows): |
---|
381 | if self.wk1.currentSi.row(j)[0].ctype == 2: |
---|
382 | v = string.strip( str( self.wk1.currentSi.row(j)[5].value ) ) |
---|
383 | t2 = self.wk1.currentSi.row(j)[17].ctype |
---|
384 | ee1[v].append((j,t2)) |
---|
385 | for j in range(3,self.wk1.currentSi.nrows): |
---|
386 | rv = map( lambda x: x.value, self.wk1.currentSi.row(j) ) |
---|
387 | lr0 = len(rv) |
---|
388 | for i in range(10): |
---|
389 | if len(rv) < 26: |
---|
390 | rv.append('') |
---|
391 | if self.wk1.currentSi.row(j)[0].ctype == 2: |
---|
392 | v = string.strip( str( self.wk1.currentSi.row(j)[5].value ) ) |
---|
393 | if self.wk1.currentSi.row(j)[17].ctype == 1: |
---|
394 | v2 = string.strip( str( self.wk1.currentSi.row(j)[17].value ) ) |
---|
395 | else: |
---|
396 | v2 = None |
---|
397 | if v not in ['0.0','']: |
---|
398 | if len(ee1[v]) > 1: |
---|
399 | ##v += '__%s' % ['a','b'][ee1[v].index(j)] |
---|
400 | assert not all( map( lambda x: x[1] != 1, ee1[v] ) ), 'duplicate variable name and no cmor name ... %s %s' % (v,self.mip) |
---|
401 | if v2 != None: |
---|
402 | v = v2 |
---|
403 | if v == 'clic': |
---|
404 | print 'INFO.clic.00004: ',rv |
---|
405 | ixr = len(self.parent.records ) |
---|
406 | ## add record |
---|
407 | self.parent.records.append( tuple(rv[1:]) ) |
---|
408 | kr = len( self.parent.records ) |
---|
409 | self.parent.recordProv[kr] = self.md5 |
---|
410 | p = int( rv[0] + 0.001 ) |
---|
411 | if lr0 > 24: |
---|
412 | if self.wk1.currentSi.row(j)[24].ctype == 2: |
---|
413 | yv = int( rv[24] + 0.001 ) |
---|
414 | assert yv in [0,1,2], 'Invalid value in column Y, j=%s,v=%s' % (j,v) |
---|
415 | if yv == 0: |
---|
416 | p = None |
---|
417 | elif yv == 2: |
---|
418 | p = 1 |
---|
419 | elif rv[24] != '': |
---|
420 | print 'WARN.001.02001: [1]:', self.mip, v, p, rv[24] |
---|
421 | |
---|
422 | if p != None: |
---|
423 | self.parent.t[self.tabid].vars[v].append( (self.mip,p,ixr,self.section,j) ) |
---|
424 | self.nn += 1 |
---|
425 | |
---|
426 | return True |
---|
427 | |
---|
428 | class main(object): |
---|
429 | def __init__(self): |
---|
430 | kk=3 |
---|
431 | self.idx = 0 |
---|
432 | self.vl3 = None |
---|
433 | self.sh = shelve.open( 'dreq_consol_tables_shelve_v%s' % vdate, 'n' ) |
---|
434 | self.shnv = shelve.open( 'dreq_consol_tables_nv_shelve_v%s' % vdate, 'n' ) |
---|
435 | self.shnvo = shelve.open( 'dreq_consol_tables_nvom_shelve_v%s' % vdate, 'n' ) |
---|
436 | self.shold = shelve.open( 'dreq_consol_tables_old_shelve_v%s' % vdate, 'n' ) |
---|
437 | self.oonvo = open('DepricatedNewVars.txt', 'w' ) |
---|
438 | |
---|
439 | base = '/home/martin/2014/wip/dreq/' |
---|
440 | self.dir0 = '/home/martin/2014/wip/dreq/input/' |
---|
441 | file = 'PMIP/CMIP6DataRequestCompilationTemplate_20141218_PMIP_v150228_final.xls' |
---|
442 | file = 'C4MIP/CMIP6DataRequestCompilationTemplate_C4MIP_06March2015_submitted.xls' |
---|
443 | fileTmpl = 'CMIP6DataRequestCompilationTemplate_20141218.xls' |
---|
444 | self.newVars = 'CMIP6DataRequest_ConsolidatedNewVariables_v20150907.xls' |
---|
445 | self.newVars = 'CMIP6DataRequest_ConsolidatedNewVariables.xls' |
---|
446 | self.nvd = {} |
---|
447 | |
---|
448 | wk0 = wbcp( inbook=base+fileTmpl ) |
---|
449 | self.cfg = dreq_cfg.rqcfg() |
---|
450 | wk0.focus( u'Experiments') |
---|
451 | mlist = collections.defaultdict( list ) |
---|
452 | for i in range(2,wk0.currentSi.nrows): |
---|
453 | mip = str(wk0.currentSi.row(i)[3].value) |
---|
454 | vals = map( lambda x: x.value, wk0.currentSi.row(i) ) |
---|
455 | if mip != '': |
---|
456 | mlist[mip].append( vals ) |
---|
457 | |
---|
458 | self.mips = mlist.keys() |
---|
459 | self.mips.sort() |
---|
460 | |
---|
461 | mipTrans ={ 'geoMIP':'GeoMIP' } |
---|
462 | diagMips = ['DynVar','SIMIP'] |
---|
463 | k2 = self.cfg.ff.keys() |
---|
464 | k2.sort() |
---|
465 | nn = 0 |
---|
466 | for k in k2: |
---|
467 | k = mipTrans.get( k,k) |
---|
468 | if k not in self.mips and k not in diagMips: |
---|
469 | print 'ERROR.001.09001: %s not found' % k |
---|
470 | nn += 1 |
---|
471 | assert nn == 0, 'MIP naming error?' |
---|
472 | |
---|
473 | exptList = '/home/martin/Documents/CMIP6_ExpermentList_draft01April2015.xls' |
---|
474 | wke = wbcp( exptList ) |
---|
475 | wke.focus( 'Experiments' ) |
---|
476 | expgs = collections.defaultdict( int ) |
---|
477 | expgsEns = collections.defaultdict( int ) |
---|
478 | expgsYpe = collections.defaultdict( int ) |
---|
479 | for k in range(2,wke.currentSi.nrows): |
---|
480 | vals = map( lambda x: x.value, wke.currentSi.row(k) ) |
---|
481 | g = string.strip( vals[1] ) |
---|
482 | if g[:5] == 'DAMIP': |
---|
483 | print 'INFO.001.00060: ',g,vals[12],vals[13] |
---|
484 | expgs[g] += vals[13] |
---|
485 | expgsEns[g] = vals[12] |
---|
486 | |
---|
487 | for k in expgs.keys(): |
---|
488 | if expgsEns[k] == 0: |
---|
489 | expgsYpe[k] = 0 |
---|
490 | else: |
---|
491 | try: |
---|
492 | expgsYpe[k] = expgs[k]/expgsEns[k] |
---|
493 | except: |
---|
494 | print 'INFO.001.00070: ',k, expgs[k], expgsEns[k] |
---|
495 | raise |
---|
496 | |
---|
497 | self.rqs= rqsummary(exptInfo=(expgs,expgsEns,expgsYpe)) |
---|
498 | self.exptInfo=(expgs,expgsEns,expgsYpe) |
---|
499 | |
---|
500 | def loadNewVars(self): |
---|
501 | assert os.path.isfile( self.newVars ), '%s not found' % self.newVars |
---|
502 | wb = workbook(self.newVars) |
---|
503 | sh = wb.book.sheet_by_name( 'New variables' ) |
---|
504 | oo = open( '/tmp/newvars.txt', 'w' ) |
---|
505 | for i in range(3,sh.nrows): |
---|
506 | r = map( lambda x: x.value, sh.row(i) ) |
---|
507 | try: |
---|
508 | oo.write( string.join(map(str,r) ) + '\n' ) |
---|
509 | except: |
---|
510 | oo.write( r[0] + ' --- rest missing --- \n' ) |
---|
511 | if r[0] != '' and r[4] != '' and r[0][0] != '#': |
---|
512 | v = string.strip( str( r[4] ) ) |
---|
513 | if v[0] != '#': |
---|
514 | bits = string.split(v,' ') |
---|
515 | if len(bits) == 1: |
---|
516 | m = str( r[0] ) |
---|
517 | p = 0 |
---|
518 | try: |
---|
519 | if m == 'LUMIP': |
---|
520 | r = r[:5] + r[6:] |
---|
521 | p = int( r[11] ) |
---|
522 | except: |
---|
523 | print 'INFO.001.00070: Failed to read priority: ',v,m |
---|
524 | if v[-2:] == '**': |
---|
525 | v = v[:-2] |
---|
526 | elif v[-1] == '!': |
---|
527 | v = v[:-1] |
---|
528 | if self.nvd.has_key(v): |
---|
529 | print 'INFO.001.00070: ',v,self.nvd[v],m,p |
---|
530 | self.nvd[v] = [p,] + r |
---|
531 | |
---|
532 | for v in self.nvd.keys(): |
---|
533 | if v in self.vl3: |
---|
534 | print 'WARN.090.08000: "new" variable in old list: %s: %s ' % (v,str(self.vl3[v]) ) |
---|
535 | self.shnvo[v] = ( self.nvd[v][:], self.vl3[v] ) |
---|
536 | self.oonvo.write( '-- %s --\n' % v ) |
---|
537 | self.oonvo.write( str( self.nvd[v][:]) + '\n' ) |
---|
538 | self.oonvo.write( str( self.vl3[v][:] ) + '\n' ) |
---|
539 | else: |
---|
540 | k = str( uuid.uuid1() ) |
---|
541 | self.shnv[k] = self.nvd[v][:] + [k,] |
---|
542 | oo.close() |
---|
543 | |
---|
544 | def groups(self): |
---|
545 | vl3 = self.vl3 |
---|
546 | rh1 = ['Short name', 'Standard Name', 'Table', 'Frequency', 'Description extension (optional)', 'Shape', 'Levels', 'Time mean, point or climatology', 'Mask (optional)'] |
---|
547 | rh2 = ['Short name of group', 'Variable short name', 'Table', 'Frequency', 'Description extension (optional)', 'Shape', 'Levels', 'Time mean, point or climatology', 'Mask (optional)'] |
---|
548 | rh3 = ['Short name of group', 'Variable short name', 'Table', 'Frequency', 'Description extension (optional)', 'Shape', 'Levels', 'Time mean, point or climatology', 'Mask (optional)', 'Priority'] |
---|
549 | |
---|
550 | print 'INFO.002.0001: cfg.ee.keys: ',self.cfg.ee.keys() |
---|
551 | omit = ['ALL VARIABLES', 'Objectives','Experiments','Experiment Groups','Request scoping','New variables','__lists__'] |
---|
552 | keys = self.cfg.ee.keys() |
---|
553 | keys.sort() |
---|
554 | ee = {} |
---|
555 | self.glist = [] |
---|
556 | for k in keys: |
---|
557 | gset = set() |
---|
558 | fn = self.cfg.ee[k] |
---|
559 | path = '%s%s/%s' % (self.dir0,k,fn) |
---|
560 | wb = workbook( path ) |
---|
561 | ss = [] |
---|
562 | for s in wb.sns: |
---|
563 | if s not in omit: |
---|
564 | ss.append(s) |
---|
565 | for s in ss: |
---|
566 | e1 = collections.defaultdict( int ) |
---|
567 | if s[:5] != 'CMIP5': |
---|
568 | sh = wb.book.sheet_by_name( s ) |
---|
569 | ll = [] |
---|
570 | for i in range(sh.nrows): |
---|
571 | thisr = sh.row(i) |
---|
572 | tv = thisr[0].value |
---|
573 | if tv[:10] == 'Short name': |
---|
574 | ll.append(i) |
---|
575 | assert len(ll) in [1,2], 'Could not parse sheet %s, %s, %s: %s' % (path,k,s,len(ll)) |
---|
576 | withPriority = False |
---|
577 | hr = sh.row( ll[-1] ) |
---|
578 | if len(ll) == 1: |
---|
579 | ## nt__grptbl = collections.namedtuple( 'grptbl', ['grp','var','srcTable','freq','description','shape','levels','timeProc','mask'] ) |
---|
580 | iv = 1 |
---|
581 | it = 0 |
---|
582 | ok = len( hr ) >= 9 and all( map( lambda x: string.strip(hr[x].value) == rh2[x], range(9) ) ) |
---|
583 | assert ok, '001: Sheet heading not recognised: %s' % str(hr) |
---|
584 | if len(hr) > 9 and hr[9].value == u'Priority': |
---|
585 | withPriority = True |
---|
586 | thisl = 10 |
---|
587 | else: |
---|
588 | thisl = 9 |
---|
589 | else: |
---|
590 | ok = len( hr ) >= 9 and all( map( lambda x: string.strip(hr[x].value) == rh1[x], range(9) ) ) |
---|
591 | assert ok, '002: Sheet heading not recognised: %s' % str(hr) |
---|
592 | iv = 0 |
---|
593 | it = -1 |
---|
594 | tv = sh.row(2)[1].value |
---|
595 | thisl = 9 |
---|
596 | |
---|
597 | irsh = 5 |
---|
598 | for i in range(ll[-1]+1,sh.nrows): |
---|
599 | rowIndex = i |
---|
600 | thisr = sh.row(i) |
---|
601 | v0 = str( thisr[0].value ) + '__' |
---|
602 | if v0[0] != '#': |
---|
603 | if iv == 1: |
---|
604 | ##self.ntr = nt__grptbl._make( getRowValues( thisr, minLen=9 ) + [105,k,''] ) |
---|
605 | lll = getRowValues( thisr, minLen=thisl, maxLen=thisl ) |
---|
606 | else: |
---|
607 | lll = getRowValues( thisr, minLen=thisl, maxLen=thisl ) |
---|
608 | lll[1] = lll[0] |
---|
609 | lll[0] = tv |
---|
610 | if thisl == 9: |
---|
611 | lll.append( 105 ) |
---|
612 | |
---|
613 | assert len(lll) == 10,'bad record length ....' |
---|
614 | ### |
---|
615 | ### add mip name and space ... |
---|
616 | lll += [k,''] |
---|
617 | if it >= 0: |
---|
618 | tv = thisr[it].value |
---|
619 | v = thisr[iv].value |
---|
620 | bits = string.split(v, ' ') |
---|
621 | if len(bits) > 1: |
---|
622 | v = bits[0] |
---|
623 | if v == 'ta': |
---|
624 | print 'INFO.045.0001: reading ta: thisl=%s, lll=%s' % (thisl,lll) |
---|
625 | print hr |
---|
626 | t = thisr[2].value |
---|
627 | if t[:6] == "CMIP5_": |
---|
628 | t = t[6:] |
---|
629 | if len(string.split(t, ' ')) > 1: |
---|
630 | t2 = string.split(t, ' ')[1] |
---|
631 | t = string.split(t, ' ')[0] |
---|
632 | else: |
---|
633 | t2 = None |
---|
634 | if t in ['OImon','day','Omon'] and t2 != None: |
---|
635 | v2 = string.strip(t2)[1:-1] |
---|
636 | else: |
---|
637 | v2 = None |
---|
638 | |
---|
639 | pmsg = '' |
---|
640 | p0 = None |
---|
641 | if v == '*': |
---|
642 | if t in self.cmip5sns: |
---|
643 | print 'INFO[1a]: bulk copy of external sheet %s' % t |
---|
644 | elif t in ss: |
---|
645 | print 'INFO[1b]: bulk copy of internal sheet %s' % t |
---|
646 | |
---|
647 | elif len(v) == 0 or v[0] == '#': |
---|
648 | pass |
---|
649 | else: |
---|
650 | |
---|
651 | if vl3 != None: |
---|
652 | if vl3.has_key(v): |
---|
653 | id = vl3[v][7] |
---|
654 | elif v2 != None and vl3.has_key(v2): |
---|
655 | id = vl3[v2][7] |
---|
656 | else: |
---|
657 | id = None |
---|
658 | |
---|
659 | if t in self.cmip5sns: |
---|
660 | p = 1 |
---|
661 | if self.rqs.t.has_key(t): |
---|
662 | if self.rqs.t[t].vars.has_key(v): |
---|
663 | p = self.rqs.t[t].vars[v][0][1] |
---|
664 | pmsg = 'a: %s: %s' % (v,str( self.rqs.t[t].vars[v][0] ) ) |
---|
665 | elif v in self.nvd: |
---|
666 | p = self.nvd[v][0] |
---|
667 | print 'INFO.priority.00002: ',v,p |
---|
668 | elif v2 != None and self.rqs.t[t].vars.has_key(v2): |
---|
669 | p = self.rqs.t[t].vars[v2][0][1] |
---|
670 | ##print '%s old name for %s' % (v2,v) |
---|
671 | pmsg = 'b: %s: %s' % (v2,str( self.rqs.t[t].vars[v2][0] ) ) |
---|
672 | else: |
---|
673 | if v in self.nvd: |
---|
674 | print 'ERROR.priority.0001: wrongly assigned table',v,lll,self.nvd[v] |
---|
675 | else: |
---|
676 | print 'ERROR.priority.0002: variable not found', v,lll |
---|
677 | p0 = None |
---|
678 | |
---|
679 | else: |
---|
680 | if string.lower(t)[:3] == 'new' or t == '': |
---|
681 | p = self.nvd.get( v, [0,7] )[0] |
---|
682 | if v == 'sisnconc': |
---|
683 | print 'INFO.sisnconc.00002: ', p, v in self.nvd |
---|
684 | pmsg = 'c: %s: %s' % (v,str(self.nvd.get( v, [0,7] )) ) |
---|
685 | if p == 7: |
---|
686 | p = self.nvd.get( v + '!', [0,7] )[0] |
---|
687 | pmsg = 'd: %s: %s' % (v + '!',str(self.nvd.get( v + '!', [0,7] )) ) |
---|
688 | if p != 7: |
---|
689 | self.idx += 1 |
---|
690 | id = 'mv.010.%4.4i' % self.idx |
---|
691 | if id == None and p < 7: |
---|
692 | print 'WARNING[1]: no id for %s' % v |
---|
693 | elif id == None: |
---|
694 | print 'WARNING[2]: no id, no variable defn for %s' % v |
---|
695 | elif p > 6: |
---|
696 | print 'WARNING[3.%s]: no variable defn for %s ' % (p,v) |
---|
697 | ts = string.strip(thisr[irsh].value) |
---|
698 | if string.strip(tv) != '': |
---|
699 | e1['%s:%s' % (ts,p)] += 1 |
---|
700 | |
---|
701 | ### complete ntr and save to shelf. |
---|
702 | if type(p) != type(0): |
---|
703 | print 'ERROR.020.0002: attempt to set non integer priority: %s [%s] ... %s' % (p,str(lll),pmsg) |
---|
704 | p = 106 |
---|
705 | if (not withPriority) or string.strip( str( lll[9]) ) == '': |
---|
706 | lll[9] = p |
---|
707 | if p0 != None and lll[9] != p0: |
---|
708 | print 'INFO.088.00001: overriding p: ',lll,p0 |
---|
709 | if v == 'ta': |
---|
710 | print 'INFO.045.0002: reading ta: thisl=%s, lll=%s' % (thisl,lll) |
---|
711 | print hr |
---|
712 | if v == 'sisnconc': |
---|
713 | print 'INFO.sisnconc.00001', lll, t, t in self.cmip5sns, withPriority |
---|
714 | self.ntr = nt__grptbl._make( lll ) |
---|
715 | ku = str( uuid.uuid1() ) |
---|
716 | lll[11] = ku |
---|
717 | self.sh[ku] = tuple( lll + [rowIndex,]) |
---|
718 | gset.add( lll[0] ) |
---|
719 | ### MIP, group, size, dictionary -- # items in each shape. |
---|
720 | self.glist.append( (k, s, ll, e1) ) |
---|
721 | print 'INFO.gset.0001: ',k,gset |
---|
722 | |
---|
723 | def run1(self,nmip=0,kmip=0,clear=False,tabmode='uid'): |
---|
724 | if clear: |
---|
725 | self.rqs.t = {} |
---|
726 | self.cfg.ff['CMIP5'] = ['/data/work/documents/CMIP5_standard_output.xls'] |
---|
727 | wb = workbook( self.cfg.ff['CMIP5'][0] ) |
---|
728 | wb.sns.sort() |
---|
729 | omit1 = [u'dims', u'general', u'other output',u'CFMIP output'] |
---|
730 | sns = [] |
---|
731 | for s in wb.sns: |
---|
732 | if s not in omit1: |
---|
733 | sns.append(s) |
---|
734 | |
---|
735 | self.cmip5sns = sns[:] |
---|
736 | k2 = self.cfg.ff.keys() |
---|
737 | k2.sort() |
---|
738 | k2.append( 'CMIP5' ) |
---|
739 | |
---|
740 | amips = ['CMIP5'] + self.mips |
---|
741 | if nmip > 0: |
---|
742 | amips = amips[kmip:kmip+nmip] |
---|
743 | for s in sns: |
---|
744 | for k in amips: |
---|
745 | if k in k2: |
---|
746 | for fn in self.cfg.ff[k]: |
---|
747 | if k == 'CMIP5': |
---|
748 | path = fn |
---|
749 | else: |
---|
750 | path = '%s%s/%s' % (self.dir0,k,fn) |
---|
751 | extra = self.cfg.fff.get( fn, None ) |
---|
752 | ## |
---|
753 | ## deal with modified CMIP5 tables |
---|
754 | ## |
---|
755 | thisrq = self.rqs.add( k, path,s,tabmode=tabmode, extra=extra ) |
---|
756 | if thisrq.ok: |
---|
757 | print 'INFO.002:',k,path,s,thisrq.ok, thisrq.nn |
---|
758 | else: |
---|
759 | if thisrq.reason != 'Table not present': |
---|
760 | print 'WARN.002:',k,path,s,thisrq.ok, thisrq.reason |
---|
761 | |
---|
762 | def run2(self): |
---|
763 | init=False |
---|
764 | if init: |
---|
765 | wbx = wbcp( '/home/martin/Documents/MIPTableSheetTemplate.xls' ) |
---|
766 | else: |
---|
767 | wbx = wbcp( 'CMIP6_DataRequest_CMIP5changes_blank.xls' ) |
---|
768 | |
---|
769 | print 'INFO.002.00002: rqs.t.keys: ',self.rqs.t.keys() |
---|
770 | for s in wbx.sns: |
---|
771 | if s != u'Template': |
---|
772 | keys = self.rqs.t[s].vars.keys() |
---|
773 | keys.sort() |
---|
774 | ##for k in keys: |
---|
775 | ##print k, self.rqs.t[s].vars[k] |
---|
776 | self.rqs.prep( tab=s ) |
---|
777 | print 'INFO.001:', s, len(self.rqs.oorecs) |
---|
778 | if len(self.rqs.oorecs) > 0: |
---|
779 | if init: |
---|
780 | wbx.copy_sheet( 0, s ) |
---|
781 | wbx.focus( s, old='Template' ) |
---|
782 | else: |
---|
783 | wbx.focus( s ) |
---|
784 | self.rqs.showXls(wbx,r0=2,offset=0) |
---|
785 | |
---|
786 | for i in range( min( 5, len( self.rqs.oorecs) ) ): |
---|
787 | print 'INFO.002: oorecs: ',self.rqs.oorecs[i] |
---|
788 | |
---|
789 | oo = open( 'dreq2/request2_%s.csv' % s, 'w' ) |
---|
790 | self.rqs.show( oo, tab=s ) |
---|
791 | oo.close() |
---|
792 | |
---|
793 | wbx.write( 'CMIP6_DataRequest_CMIP5changes.xls' ) |
---|
794 | |
---|
795 | ##wb3 = workbook( 'vlsc5b_v20150714.xls' ) |
---|
796 | wb3 = workbook( 'vlsc5b_v20160309.xls' ) |
---|
797 | sh = wb3.book.sheet_by_name( 'Sheet1' ) |
---|
798 | m = main() |
---|
799 | |
---|
800 | opt = 1 |
---|
801 | if opt == 1: |
---|
802 | m.vl3 = {} |
---|
803 | for i in range(sh.nrows): |
---|
804 | r = map( lambda x: string.strip(str(uniCleanFunc(x.value))), sh.row(i) ) |
---|
805 | if len(r) < 9 or r[8] == '': |
---|
806 | m.vl3[r[1]] = tuple( r[:8] ) |
---|
807 | m.shold[r[0]] = r |
---|
808 | |
---|
809 | m.run1(nmip=1) |
---|
810 | m.loadNewVars() |
---|
811 | m.groups( ) |
---|
812 | k = m.sh.keys()[4] |
---|
813 | print '##############################################' |
---|
814 | print 'INFO.003.0001',k,m.sh[k] |
---|
815 | sss = set() |
---|
816 | for k in m.sh.keys(): |
---|
817 | if k[0] != '_': |
---|
818 | sss.add( m.sh[k][0] ) |
---|
819 | print 'INFO.003.0002: test .... ','cfMon_3dstd_new' in sss |
---|
820 | |
---|
821 | m.sh.close() |
---|
822 | m.shnv.close() |
---|
823 | m.shnvo.close() |
---|
824 | m.oonvo.close() |
---|
825 | m.shold.close() |
---|
826 | |
---|
827 | m.run1( clear=True) |
---|
828 | keys = sorted( m.rqs.t ) |
---|
829 | sh = shelve.open( 'dreq_consol_tables_reviewed_a_v%s' % vdate, 'n' ) |
---|
830 | sh['records'] = m.rqs.records[:] |
---|
831 | sh['recordProv'] = m.rqs.recordProv.copy() |
---|
832 | sh.close() |
---|
833 | sh = shelve.open( 'dreq_consol_tables_reviewed_b_v%s' % vdate, 'n' ) |
---|
834 | sh['__keys__'] = map( str, keys[:] ) |
---|
835 | for k in keys: |
---|
836 | vs = sorted( m.rqs.t[k].vars) |
---|
837 | ee = {} |
---|
838 | m.rqs.vn = {} |
---|
839 | for v in vs: |
---|
840 | for mip,p,ix,sect,rowix in m.rqs.t[k].vars[v]: |
---|
841 | if v == 'clic': |
---|
842 | print 'INFO.clic.00003: ',k,v,mip,p,ix,sect,rowix |
---|
843 | this = m.rqs.records[ix] |
---|
844 | if v == 'prhmax': |
---|
845 | print 'xxxxx: ',v,mip,p,ix,sect,rowix, m.rqs.records[ix] |
---|
846 | if this[16] in ['','0.0',0.0]: |
---|
847 | vn = this[4] |
---|
848 | else: |
---|
849 | vn = this[16] |
---|
850 | if ee.has_key(vn): |
---|
851 | print 'ERROR.030.0010: DUPLICATE %s' % vn |
---|
852 | print m.rqs.t[k].vars[v] |
---|
853 | print m.rqs.records[ee[vn][0]] |
---|
854 | print m.rqs.records[ix] |
---|
855 | ee[vn] = (ix,p,sect,rowix) |
---|
856 | |
---|
857 | print 'INFO.009.00101:', k,len(vs),'#################' |
---|
858 | k2 = str(k) |
---|
859 | m.rqs.vn[k2] = ee |
---|
860 | sh[k2] = ee.copy() |
---|
861 | sh.close() |
---|
862 | shr1.close() |
---|
863 | |
---|
864 | elif opt == 2: |
---|
865 | m.run1(tabmode='tab') |
---|
866 | m.run2() |
---|
867 | shr1.close() |
---|