1 | |
---|
2 | import string, collections, shelve, uuid |
---|
3 | from sets import Set |
---|
4 | from utils_wb import uniCleanFunc |
---|
5 | |
---|
6 | def labcoerce(s,nodash=True): |
---|
7 | s = string.replace(s, '.', '' ) |
---|
8 | s = string.replace(s, '+', '' ) |
---|
9 | s = string.replace(s, '/', ' ' ) |
---|
10 | s = string.replace(s, '--', '-' ) |
---|
11 | if nodash: |
---|
12 | if string.find(s, '-' ) != -1: |
---|
13 | s = string.replace(s, '-', ' ' ) |
---|
14 | if string.find(s, ' ' ) != -1: |
---|
15 | s = string.replace( string.capwords( s ), ' ', '' ) |
---|
16 | return s |
---|
17 | else: |
---|
18 | return s |
---|
19 | |
---|
20 | class varKeys(object): |
---|
21 | def __init__(self,file='dreq_consol_tables_shelve_v20160309' ): |
---|
22 | sh = shelve.open( file, 'r' ) |
---|
23 | cc = collections.defaultdict( list ) |
---|
24 | self.keys = {} |
---|
25 | for k in sh: |
---|
26 | cc[ tuple( [sh[k][x] for x in [0,1,3,5,10]] ) ].append( k ) |
---|
27 | for t in cc.keys(): |
---|
28 | if len(cc[t]) == 1: |
---|
29 | self.keys[t] = cc[t][0] |
---|
30 | elif t[1] in ['','*'] or t[1][0] == '#': |
---|
31 | print 'INFO.vark.0001: ignoring %s' % str(t) |
---|
32 | elif all( [sh[kk][4] in ['Greenland','Antarctica'] for kk in cc[t]] ): |
---|
33 | for kk in cc[t]: |
---|
34 | t1 = tuple( list(t) + [sh[kk][4],] ) |
---|
35 | print 'INFO.vark.0004: Greenland/Antarctic pair',t1 |
---|
36 | self.keys[t1] = kk |
---|
37 | sh.close() |
---|
38 | |
---|
39 | def lookuprec(self,r ): |
---|
40 | t = tuple( [r[x] for x in [0,1,3,5,10]] ) |
---|
41 | if r[4] in ['Greenland','Antarctica']: |
---|
42 | t1 = tuple( list(t) + [r[4],] ) |
---|
43 | if t1 in self.keys: |
---|
44 | if t in self.keys: |
---|
45 | print 'WARNING.vark.0002: duplicate key %s -- %s' % (str(t),str(t1)) |
---|
46 | return self.keys[t] |
---|
47 | else: |
---|
48 | return self.keys[t1] |
---|
49 | else: |
---|
50 | print 'WARNING.vark.0003: key not found: ',r,t1 |
---|
51 | if t in self.keys: |
---|
52 | return self.keys[t] |
---|
53 | print 'INFO.vark.0003: new key: %s' % str(t) |
---|
54 | return str( uuid.uuid1() ) |
---|
55 | |
---|
56 | class loadcsv(object): |
---|
57 | def __init__(self,vdate): |
---|
58 | self.eeoldl = collections.defaultdict( list ) |
---|
59 | self.ee_xref = {} |
---|
60 | self.ee_mip = {} |
---|
61 | self.cmv = collections.defaultdict( list ) |
---|
62 | for ee,fn,sep,xxx,nrmin in [(self.ee_xref,'vlsc5_xref_v%s.csv' % vdate,',',False,5),(self.ee_mip,'vlsc5_mip_v%s.csv' % vdate, '\t',False,24),(self.eeoldl,'vlsc5b_v%s.csv' % vdate, '\t',True,9)]: |
---|
63 | for i in open(fn).readlines(): |
---|
64 | r = map( lambda x: string.strip(str(uniCleanFunc(x))), string.split( string.strip(i), sep ) ) |
---|
65 | while len(r) < nrmin: |
---|
66 | r.append( '' ) |
---|
67 | if xxx: |
---|
68 | ee[r[0]].append( r ) |
---|
69 | else: |
---|
70 | ee[r[0]] = r |
---|
71 | |
---|
72 | for k in self.ee_mip.keys(): |
---|
73 | r = self.ee_mip[k] |
---|
74 | v = r[21] |
---|
75 | if string.strip(v) == '': |
---|
76 | v = r[15] |
---|
77 | mtab = r[22] |
---|
78 | kk = '%s.%s' % (mtab,v) |
---|
79 | self.cmv[kk].append( k ) |
---|
80 | |
---|
81 | |
---|
82 | class prconsolexpt(object): |
---|
83 | def __init__(self): |
---|
84 | import expts |
---|
85 | self.records = [] |
---|
86 | self.edict = {} |
---|
87 | self.info =['label','group','mip','description','modelClass','tier','nstart','starty','endy','yps','ensz','ntot','comment'] |
---|
88 | self.refexpt = expts.expts |
---|
89 | |
---|
90 | def parse(self,sh): |
---|
91 | for j in range(2,sh.nrows): |
---|
92 | r = map( lambda x: x.value, sh.row(j) ) |
---|
93 | n = string.strip( str(r[0]) ) |
---|
94 | n2 = string.strip( str(r[4]) ) |
---|
95 | if n2 in self.refexpt or n in ['0','0.0',0,0.0]: |
---|
96 | print 'prconsolexpt: replace %s --> %s' % (n,n2) |
---|
97 | n = n2 |
---|
98 | g = string.strip( r[1] ) |
---|
99 | mip = string.strip( r[3] ) |
---|
100 | desc = string.strip( r[7] ) |
---|
101 | mcfg = string.strip( r[8] ) |
---|
102 | comment = '' |
---|
103 | try: |
---|
104 | if type(r[5]) == type(1.): |
---|
105 | tier = int( r[5] ) |
---|
106 | else: |
---|
107 | tier = [int( x) for x in string.split( r[5] )] |
---|
108 | except: |
---|
109 | print 'ERROR.001.0001: tier not integer: %s, %s' % (mip,r[5]) |
---|
110 | if r[5][:3] == 'Sub': |
---|
111 | tier = 1 |
---|
112 | comment += 'Tiering of sub-components .....' |
---|
113 | else: |
---|
114 | raise |
---|
115 | ll = [] |
---|
116 | for j in [6,11,12,13]: |
---|
117 | try: |
---|
118 | if type(r[j]) == type(1.): |
---|
119 | v = int( r[j] ) |
---|
120 | else: |
---|
121 | v = [int( x) for x in string.split( r[j] )] |
---|
122 | ll.append( v ) |
---|
123 | except: |
---|
124 | k = {6:'nstart', 11:'years per sim', 12:'ensemble size', 13:'total number of years'}[j] |
---|
125 | try: |
---|
126 | if string.find( r[j], '-' ) != -1: |
---|
127 | ll.append( int( string.split( r[j], '-' )[0] ) ) |
---|
128 | else: |
---|
129 | try: |
---|
130 | ll.append( int( string.split( r[j] )[0] ) ) |
---|
131 | except: |
---|
132 | print 'Failed to convert to integer ',r[j] |
---|
133 | print r |
---|
134 | raise |
---|
135 | comment += ': %s abbreviated: %s' % (k,r[j]) |
---|
136 | print 'ERROR.001.0002: %s not integer: %s, %s' % (k,mip,r[j]) |
---|
137 | except: |
---|
138 | raise |
---|
139 | starty = r[9] |
---|
140 | endy = r[10] |
---|
141 | nstart, yps, ensz, ntot = ll |
---|
142 | ##yps = int( r[11] ) |
---|
143 | ##ensz = int( r[12] ) |
---|
144 | ##ntot = int( r[13] ) |
---|
145 | self.edict[n] = (n,g,mip,desc,mcfg,tier,nstart,starty,endy,yps,ensz,ntot,comment) |
---|
146 | self.records.append( [n,g,mip,desc,mcfg,tier,nstart,starty,endy,yps,ensz,ntot,comment] ) |
---|
147 | |
---|
148 | class probj(object): |
---|
149 | def __init__(self): |
---|
150 | self.records = [] |
---|
151 | |
---|
152 | def parse(self,mip,sh): |
---|
153 | st = False |
---|
154 | r = None |
---|
155 | for j in range(sh.nrows): |
---|
156 | r = map( lambda x: x.value, sh.row(j) ) |
---|
157 | if st: |
---|
158 | rr = [string.strip(x) for x in r] |
---|
159 | if all( [rx == '' for rx in rr[:3]] ): |
---|
160 | pass |
---|
161 | ## omit of all blank |
---|
162 | elif any( [rx == '' for rx in rr[:3]] ): |
---|
163 | print 'Record with blank(s): %s: %s' % (mip,str(rr) ) |
---|
164 | self.records.append( (mip,rr[0],rr[1],rr[2]) ) |
---|
165 | else: |
---|
166 | self.records.append( (mip,rr[0],rr[1],rr[2]) ) |
---|
167 | elif r[0] == 'Short name': |
---|
168 | st = True |
---|
169 | |
---|
170 | class prcexr(object): |
---|
171 | """Parse a row of the consolidated experiment sheet""" |
---|
172 | |
---|
173 | def __init__(self,dd=None): |
---|
174 | self.nt = collections.namedtuple( 'r4info', ['label','group','ix','mip','altLabel','tier','nstart','description','config','start','end','lsim','ensSize','ntot','startNote','tierNote'] ) |
---|
175 | if dd != None: |
---|
176 | ks = sorted( dd.keys() ) |
---|
177 | for k in ks: |
---|
178 | self.parse( k, dd[k] ) |
---|
179 | |
---|
180 | def parse(self,rv): |
---|
181 | rr = rv[:14] |
---|
182 | startNote = '' |
---|
183 | tierNote = '' |
---|
184 | try: |
---|
185 | for j in [2,5,6,11,12,13]: |
---|
186 | if j == 5 and type(rr[j]) == type(u'x') and rr[j][:4] == 'Sub-': |
---|
187 | tierNote = rr[j] |
---|
188 | rr[j] = -1 |
---|
189 | elif type(rr[j]) == type(u'x') and (string.find(rr[j], 'to') != -1 or string.find(rr[j], 'or') != -1): |
---|
190 | rr[j] = int( string.split( rr[j] )[-1] ) |
---|
191 | elif type(rr[j]) == type(u'x') and (string.find(rr[j], '-') != -1): |
---|
192 | rr[j] = int( string.split( rr[j], '-' )[-1] ) |
---|
193 | elif j == 2 and rr[j] == '': |
---|
194 | rr[j] = -1 |
---|
195 | else: |
---|
196 | rr[j] = int( rr[j] ) |
---|
197 | for j in [9,10]: |
---|
198 | if rr[j] == '': |
---|
199 | rr[j] = 0 |
---|
200 | else: |
---|
201 | try: |
---|
202 | rr[j] = int( rr[j] ) |
---|
203 | except: |
---|
204 | if j == 9: |
---|
205 | startNote = rr[j] |
---|
206 | else: |
---|
207 | print startNote, rr |
---|
208 | startNote += ( ' -- %s' % rr[j] ) |
---|
209 | rr[j] = -1 |
---|
210 | except: |
---|
211 | print rr |
---|
212 | raise |
---|
213 | rr.append( startNote ) |
---|
214 | rr.append( tierNote ) |
---|
215 | self.row = self.nt._make( rr ) |
---|
216 | |
---|
217 | class pr4(object): |
---|
218 | """Parse row 4 of the request scoping sheet""" |
---|
219 | |
---|
220 | def __init__(self,dd=None): |
---|
221 | self.nt = collections.namedtuple( 'r4info', ['ixcntl','ixh','ix0','ixm','ownix','ownhr','othix','othhr','mode','treset'] ) |
---|
222 | |
---|
223 | if dd != None: |
---|
224 | ks = sorted( dd.keys() ) |
---|
225 | for k in ks: |
---|
226 | self.parse( k, dd[k] ) |
---|
227 | |
---|
228 | def parse3(self,mip,r3): |
---|
229 | if r3[6] == 'objectives served': |
---|
230 | mode = 6 |
---|
231 | elif r3[7] == 'objectives served': |
---|
232 | mode = 7 |
---|
233 | else: |
---|
234 | raise 'Unable to parse heading' |
---|
235 | mode = 999 |
---|
236 | self.r3info = mode |
---|
237 | |
---|
238 | def parse(self,mip,r4): |
---|
239 | if r4[6][:17] == 'objectives served': |
---|
240 | mode = 6 |
---|
241 | elif r4[7][:17] == 'objectives served': |
---|
242 | mode = 7 |
---|
243 | else: |
---|
244 | print 'Unable to parse heading %s' % str(r4) |
---|
245 | print mip |
---|
246 | print r4 |
---|
247 | raise |
---|
248 | mode = 999 |
---|
249 | self.ixh = 0 |
---|
250 | self.ixh = 0 |
---|
251 | self.ix0 = 0 |
---|
252 | hhref = [u'control', u'AMIP', u'abrupt4xCO2', u'1pctCO2', u'CMIP6 historical'] |
---|
253 | |
---|
254 | try: |
---|
255 | self.ixh = r4.index( u'CMIP6 historical' ) |
---|
256 | self.ixm = r4.index( u'MIP name:' ) |
---|
257 | self.ix0 = self.ixh + 2 |
---|
258 | if r4[self.ixh-8] == u'control': |
---|
259 | self.iccntl = self.ixh-8 |
---|
260 | hh = [] |
---|
261 | for j in range(5): |
---|
262 | hh.append( r4[self.iccntl + j*2] ) |
---|
263 | if hh[j] != hhref[j]: |
---|
264 | print 'ERROR.001.002: mismatch in heading %s' %j, r4 |
---|
265 | else: |
---|
266 | print 'ERROR.001.001: control expt. column not found', r4 |
---|
267 | except: |
---|
268 | if mip == 'DCPP': |
---|
269 | try: |
---|
270 | self.ix0 = r4.index( u'DCPP-A' ) |
---|
271 | self.ixm = self.ix0+8 |
---|
272 | self.ixh = -1 |
---|
273 | self.iccntl = self.ix0 |
---|
274 | hh = [] |
---|
275 | for j in range(4): |
---|
276 | hh.append( r4[self.iccntl + j*2] ) |
---|
277 | except: |
---|
278 | print '!!!!!!!!!!!', r4 |
---|
279 | raise |
---|
280 | else: |
---|
281 | print 'Failed to parse Row 4' |
---|
282 | print r4 |
---|
283 | raise |
---|
284 | |
---|
285 | self.ownex = [] |
---|
286 | self.othex = [] |
---|
287 | self.othetrst = [] |
---|
288 | for j in range(self.ix0,self.ixm,2): |
---|
289 | if string.strip(r4[j]) != '': |
---|
290 | self.ownex.append(j) |
---|
291 | for j in range(self.ixm,len(r4)-1,3): |
---|
292 | if string.strip(r4[j+1]) != '': |
---|
293 | self.othex.append(j) |
---|
294 | if r4[j+2] != '': |
---|
295 | self.othetrst.append(int(r4[j+2])) |
---|
296 | else: |
---|
297 | self.othetrst.append('') |
---|
298 | |
---|
299 | ownhr = map(lambda x: r4[x], self.ownex) |
---|
300 | othhr = map(lambda x: r4[x+1], self.othex) |
---|
301 | verbose = False |
---|
302 | if verbose: |
---|
303 | print mip, self.ixh, self.iccntl, self.ix0, self.ixm , self.ownex, self.othex |
---|
304 | print 'pr4:INFO: ',ownhr, othhr |
---|
305 | self.r4info = self.nt( self.iccntl, self.ixh, self.ix0, self.ixm, self.ownex, ownhr, self.othex, othhr,mode,self.othetrst ) |
---|
306 | ##( 'r4info', ['ixcntl','ixh','ix0','ixm','ownix','ownhr','othix','othhr','mode'] ) |
---|
307 | |
---|
308 | class parseShb(object): |
---|
309 | def __init__(self,vdate,sx,cmip5gplk): |
---|
310 | """Parse **'dreq_consol_tables_reviewed_b_v%s' % vdate** from dreq_consol_tables.py""" |
---|
311 | shb = shelve.open( 'dreq_consol_tables_reviewed_b_v%s' % vdate, 'r' ) |
---|
312 | revTabIds = Set( shb.keys()[:] ) |
---|
313 | revisedTabKeys = shb.keys() |
---|
314 | revisedTabKeys.sort() |
---|
315 | revisedTabKeysff = [] |
---|
316 | revisedTabKeysNM = {} |
---|
317 | tabsects = collections.defaultdict( Set ) |
---|
318 | tabsectbyvar = collections.defaultdict( dict ) |
---|
319 | tabsectuuid = collections.defaultdict( dict ) |
---|
320 | tabuuid = {} |
---|
321 | tabuuidrefs = collections.defaultdict( int ) |
---|
322 | sn = set() |
---|
323 | for r in revisedTabKeys: |
---|
324 | if not tabuuid.has_key( r ): |
---|
325 | tabuuid[r] = str( uuid.uuid1() ) |
---|
326 | ## |
---|
327 | ## need to get this information from requestLink_tmp ----- |
---|
328 | ## |
---|
329 | preset = -3 |
---|
330 | if string.find( r, '.' ) != -1: |
---|
331 | mip,tab = string.split(r, '.' ) |
---|
332 | assert sx.cmip5so.so.has_key(tab), 'FATAL.001.0001: revised table not found in original: %s' % tab |
---|
333 | lnf = [] |
---|
334 | lpe = [] |
---|
335 | lmod = [] |
---|
336 | s1 = set() |
---|
337 | for v in shb[r].keys(): |
---|
338 | v1 = string.strip(v) |
---|
339 | s1.add(v1) |
---|
340 | if not sx.cmip5so.so[tab].a.has_key(v1): |
---|
341 | if mip != 'OMIP': |
---|
342 | lnf.append(v) |
---|
343 | else: |
---|
344 | if shb[r][v][1] == sx.cmip5so.so[tab].a[v1][0]: |
---|
345 | lpe.append(v) |
---|
346 | else: |
---|
347 | lmod.append(v) |
---|
348 | ## |
---|
349 | ## index sections of tables (for OMIP) |
---|
350 | ## |
---|
351 | tup = shb[r][v] |
---|
352 | ### tup=(ix,p,sect,rowix) |
---|
353 | if tup[2] != None: |
---|
354 | tabsects[mip].add( (tab,tup[2]) ) |
---|
355 | tabsectbyvar[mip]['%s.%s' % (tab,v)] = tup |
---|
356 | sk = '%s.%s' % (tab,tup[2]) |
---|
357 | if not tabsectuuid[mip].has_key( sk ): |
---|
358 | tabsectuuid[mip][sk] = str( uuid.uuid1() ) |
---|
359 | ## |
---|
360 | ## want to copy to Omon ... but involves change of frequency ... and dimensionality .... |
---|
361 | ## so can only, here, reserve a uid for the tbd new cmor variable, a link of where it is coming from |
---|
362 | ## and extend the OMIP.Omon table in tabsectbyvar |
---|
363 | ## |
---|
364 | if r == 'OMIP.Oyr': |
---|
365 | if tup[2] == 'bgc' and tup[3] <= 65: |
---|
366 | ##print 'INFO.ssss.00001: %s, %s' % (v,str(tup) ) |
---|
367 | ##tabsectbyvar[mip]['Omon.%s' % v] = (tup[0], 2, 'bgc.Oyr', tup[-1] ) |
---|
368 | pass |
---|
369 | ##sn.add( tup[3] ) |
---|
370 | |
---|
371 | lnu = 0 |
---|
372 | for v in sx.cmip5so.so[tab].a.keys(): |
---|
373 | if (v not in s1) and (v not in shb[r]): |
---|
374 | if tab == 'Omon' and v in sx.cmip5so.sop['Oyr_3dtr']: |
---|
375 | pass |
---|
376 | else: |
---|
377 | lnu += 1 |
---|
378 | if len(lnf) > 0: |
---|
379 | print 'ERROR.008.0001: Revised variables not in original: %s %s' % (r,str(lnf) ) |
---|
380 | if len(lmod) == 0 and lnu == 0 and preset < 0: |
---|
381 | print 'INFO.008.0001: No modfications in %s' % r |
---|
382 | #### save uuid of table ... for use in requestLinks |
---|
383 | revisedTabKeysNM[r] = cmip5gplk[tab] |
---|
384 | else: |
---|
385 | print 'INFO.008.0002: modifications in %s: mods %s, unused %s, preset %s' % (r,len(lmod),lnu,preset) |
---|
386 | revisedTabKeysff.append( r ) |
---|
387 | |
---|
388 | self.revGpIds = Set() |
---|
389 | for t in revTabIds: |
---|
390 | if t[-4:] == 'Omon': |
---|
391 | self.revGpIds.add( t + '_oth' ) |
---|
392 | self.revGpIds.add( t + '_3d' ) |
---|
393 | elif t[-5:] == 'cfMon': |
---|
394 | self.revGpIds.add( t + '_3dstd' ) |
---|
395 | self.revGpIds.add( t + '_3dmod' ) |
---|
396 | self.revGpIds.add( t + '_2dmod' ) |
---|
397 | self.revGpIds.add( t + '_sim' ) |
---|
398 | |
---|
399 | shb.close() |
---|
400 | self.tabsectuuid = tabsectuuid |
---|
401 | self.tabsectbyvar = tabsectbyvar |
---|
402 | self.revisedTabKeysff = revisedTabKeysff |
---|
403 | self.revisedTabKeysNM = revisedTabKeysNM |
---|
404 | self.revisedTabKeys = revisedTabKeys |
---|
405 | self.tabuuid = tabuuid |
---|