1 | |
---|
2 | from xceptions import baseException |
---|
3 | import shelve, glob, os, uuid, collections, string |
---|
4 | from utils_wb import workbook |
---|
5 | import util_varGroups |
---|
6 | import util_gen |
---|
7 | |
---|
8 | |
---|
9 | tab2freq = {u'CMIP5_cfOff':'subhr', u'CORDEX_mon':'mon', u'SPECS_day':'day', u'CMIP5_day':'day', \ |
---|
10 | u'PMIP3_OImon':'mon', u'CORDEX_day':'day', u'CMIP5_LImon':'mon', u'CMIP5_OImon':'mon', \ |
---|
11 | u'CMIP5_Lmon':'mon', u'CMIP5_3hr':'3hr', u'CMIP5_Omon':'mon', u'PMIP3_OIclim':'monClim', \ |
---|
12 | u'PMIP3_fx':'fx', u'CORDEX_fx':'fx', u'PMIP3_LImon':'mon', u'CMIP5_6hrPlev':'6hr', u'PMIP3_Lmon':'mon', \ |
---|
13 | u'PMIP3_Amon':'mon', u'SPECS_Omon':'mon', u'CCMI1_fixed':'fx', u'PMIP3_Aclim':'monClim', u'CMIP5_6hrLev':'6hr', \ |
---|
14 | u'CMIP5_Oclim':'monClim', u'PMIP3_LIclim':'monClim', u'CCMI1_monthly':'mon', u'CMIP5_fx':'fx', \ |
---|
15 | u'CMIP5_cfDay':'day', u'CORDEX_6h':'6hr', u'PMIP3_day':'day', u'SPECS_OImon':'mon', u'CMIP5_cfMon':'mon', \ |
---|
16 | u'CORDEX_sem':'monClim', u'SPECS_6hr':'6hr', u'CMIP5_cfSites':'subhr', u'CCMI1_hourly':'hr', u'CMIP5_aero':'mon', \ |
---|
17 | u'CMIP5_Amon':'mon', u'PMIP3_Omon':'mon', u'CCMI1_daily':'day', u'SPECS_fx':'fx', u'PMIP3_Lclim':'monClim', \ |
---|
18 | u'DCPP-day':'day', \ |
---|
19 | u'PMIP3_Oclim':'monClim', u'SPECS_Amon':'mon', u'SPECS_Lmon':'mon', u'CMIP5_cf3hr':'3hr', u'CORDEX_3h':'3hr', \ |
---|
20 | 'OImon':'mon', 'Omon_oth':'mon', 'Omon':'mon', 'Oyr':'yr', 'Omon_3d':'mon', \ |
---|
21 | u'CCMI1_annual':'yr', u'CMIP5_Oyr':'yr', \ |
---|
22 | 'Oclim':'monClim', \ |
---|
23 | 'cfMon':'mon', \ |
---|
24 | u'cf3hr':'3hr', u'Amon':'mon', u'aero':'mon', u'aero_3d':'mon', u'6hrPlev':'6hr', u'aero_oth':'mon', \ |
---|
25 | u'cf3hr_grid':'3hr', u'LImon':'mon', u'cfSites':'subhr', u'day_oth':'day', u'cfDay_2d':'day', u'day':'day' } |
---|
26 | |
---|
27 | ## |
---|
28 | ## want to match up with input/vars_... and where is cmv uid fixed ???? |
---|
29 | ## |
---|
30 | ## |
---|
31 | ## need to migrate new vars into input/vars_ ... especially from LUMIP ... |
---|
32 | ## |
---|
33 | ## new variables which need to be added have been identified. No check for modifications (and no clear workflow here)... |
---|
34 | ## ned to check .. a few appear spurious .. and then generate csv records ... for manual copy ... |
---|
35 | ## |
---|
36 | ## |
---|
37 | ## |
---|
38 | ## add AerChemMip and VIACS to stuff scanned and checked below ..... |
---|
39 | |
---|
40 | def getSubGroup(s, var,dims,ttl,ix): |
---|
41 | sg = None |
---|
42 | if s == 'Oyr' and ix < 60: |
---|
43 | sg = 'Oyr_3dtr' |
---|
44 | elif s == 'Amon': |
---|
45 | if string.find( dims, 'lev' ) == -1 and string.find( dims, 'longitude latitude') != -1: |
---|
46 | sg = 'Amon_2d' |
---|
47 | else: |
---|
48 | sg = 'Amon_oth' |
---|
49 | elif s == 'Omon': |
---|
50 | if string.find( dims, 'longitude latitude olevel' ) != -1: |
---|
51 | sg = 'Omon_3d' |
---|
52 | else: |
---|
53 | sg = 'Omon_oth' |
---|
54 | elif s == 'cfMon': |
---|
55 | if ttl[:5] in ['ISCCP','PARAS','CALIP']: |
---|
56 | sg = 'cfMon_sim' |
---|
57 | elif var[-3:] == 'co2': |
---|
58 | if string.find( dims, 'longitude latitude alev' ) != -1: |
---|
59 | sg = 'cfMon_3dmod' |
---|
60 | else: |
---|
61 | sg = 'cfMon_2dmod' |
---|
62 | else: |
---|
63 | sg = 'cfMon_3dstd' |
---|
64 | elif s == 'cfDay': |
---|
65 | bits = '|'.split( dims ) |
---|
66 | |
---|
67 | if string.find( dims, 'longitude|latitude' ) != -1 and len(bits) > 2 and bits[2] in ['alev','alevel','alevhalf','alt40','plev7','alt40']: |
---|
68 | sg = 'cfDay_3d' |
---|
69 | else: |
---|
70 | sg = 'cfDay_2d' |
---|
71 | elif s == 'cf3hr': |
---|
72 | if dims.find( 'longitude|latitude' ) != -1: |
---|
73 | sg = 'cf3hr_grid' |
---|
74 | else: |
---|
75 | sg = 'cf3hr_sim' |
---|
76 | elif s == 'aero': |
---|
77 | if dims.find( 'longitude|latitude|alevel' ) != -1: |
---|
78 | sg = 'aero_3d' |
---|
79 | else: |
---|
80 | sg = 'aero_oth' |
---|
81 | elif s == 'day': |
---|
82 | if ix < 28: |
---|
83 | sg = 'day_ss' |
---|
84 | else: |
---|
85 | sg = 'day_oth' |
---|
86 | return sg |
---|
87 | def getSubGroupO(sn, var,dims,ttl,ix): |
---|
88 | sg = None |
---|
89 | if sn == 'Oyr' and ix < 65: |
---|
90 | sg = 'Oyr_3dtr' |
---|
91 | elif sn == 'Omon': |
---|
92 | if string.find( dims, 'longitude latitude olevel' ) != -1: |
---|
93 | sg = 'Omon_3d' |
---|
94 | else: |
---|
95 | sg = 'Omon_oth' |
---|
96 | elif sn == 'cfMon': |
---|
97 | if ttl[:5] in ['ISCCP','PARAS','CALIP']: |
---|
98 | sg = 'cfMon_sim' |
---|
99 | elif var[-3:] == 'co2': |
---|
100 | if string.find( dims, 'longitude latitude alev' ) != -1: |
---|
101 | sg = 'cfMon_3dmod' |
---|
102 | else: |
---|
103 | sg = 'cfMon_2dmod' |
---|
104 | else: |
---|
105 | sg = 'cfMon_3dstd' |
---|
106 | elif sn == 'cfDay': |
---|
107 | bits = ' '.split( dims ) |
---|
108 | if string.find( dims, 'longitude latitude' ) != -1 and len(bits) > 2 and bits[2] in ['alev','alevel','alevhalf','alt40','plev7','alt40']: |
---|
109 | sg = 'cfDay_3d' |
---|
110 | else: |
---|
111 | sg = 'cfDay_2d' |
---|
112 | elif sn == 'cf3hr': |
---|
113 | if dims.find( 'longitude latitude' ) != -1: |
---|
114 | sg = 'cf3hr_grid' |
---|
115 | else: |
---|
116 | sg = 'cf3hr_sim' |
---|
117 | return sg |
---|
118 | |
---|
119 | class ref(object): |
---|
120 | def __init__(self,sdir='inSh'): |
---|
121 | dir1 = '/data/tmp/svn3/exarch/CMIP6dreqbuild/trunk/src/workbook' |
---|
122 | wb = workbook( '%s/%s' % (dir1,'inputs/vars_20160721.xls') ) |
---|
123 | self.vars = {} |
---|
124 | s1 = wb.book.sheet_by_name(u'var') |
---|
125 | su = set() |
---|
126 | for i in range( s1.nrows ): |
---|
127 | r = s1.row(i) |
---|
128 | assert r[0] not in self.vars, 'DUPLICATE VARIABLE: %s' % str(r) |
---|
129 | self.vars[r[0].value] = (r[9].value,r[10].value) |
---|
130 | fl = sorted( glob.glob( '%s/sh__newVar_*' % sdir ) ) |
---|
131 | for f in fl: |
---|
132 | sh = shelve.open( f, 'r' ) |
---|
133 | ks = [k for k in sh.keys() if k[0] != '_'] |
---|
134 | for k in ks: |
---|
135 | if k not in self.vars: |
---|
136 | print 'MISSING NEW: %s, %s, %s' % (k,f,str(sh[k])) |
---|
137 | |
---|
138 | class cmvRef(object): |
---|
139 | def __init__(self): |
---|
140 | self.byUid = {} |
---|
141 | self.byTab = collections.defaultdict( dict ) |
---|
142 | for l in open( 'ingest/cmvRef_01.beta.32.csv' ).readlines(): |
---|
143 | lab, tab, ttl, uid = l.strip().split( '\t' ) |
---|
144 | assert uid not in self.byUid, 'DUPLICATE UID: %s' % uid |
---|
145 | self.byUid[uid] = (lab,tab,ttl) |
---|
146 | if lab in self.byTab[tab]: |
---|
147 | print 'ERROR.cmvRef.001: duplicate variable name in cmv table: %s,%s' % (tab,lab) |
---|
148 | self.byTab[tab][lab] = uid |
---|
149 | |
---|
150 | class revTabChk(object): |
---|
151 | |
---|
152 | def __init__(self,sdir='inSh'): |
---|
153 | """Review and consolidate revised tables, as ingested into shelves, and annotate records""" |
---|
154 | assert os.path.isdir( sdir ), 'Input directory not found: %s' % inSH |
---|
155 | self.ref = refTabChk() |
---|
156 | fl = sorted( glob.glob( '%s/sh__so_*' % sdir ) ) |
---|
157 | tref = util_varGroups.varGroupSs() |
---|
158 | tref.loadGroups() |
---|
159 | tprobs = set() |
---|
160 | self.sh = shelve.open( '%s/sh__consol01_revTabs' % sdir, 'n' ) |
---|
161 | self.sh['__cols__'] = ['var', 'table', 'mip', 'vid', 'priority','vgid'] |
---|
162 | self.sh['__info__'] = {'label':'revTable', 'title':'Revised variable table records generated by ingest.util_anal.revTabChk'} |
---|
163 | cc = collections.defaultdict( int ) |
---|
164 | vgmaps = {'Oyr':'OMIP-Oyr','OImon':'SIMIP-seaicemon'} |
---|
165 | kkk = 0 |
---|
166 | for f in fl: |
---|
167 | # |
---|
168 | # CMIP5 and OMIP tables go in as reference tables, and so should not be duplicated here. |
---|
169 | # |
---|
170 | ss = set() |
---|
171 | sso = set() |
---|
172 | if f.find( 'CMIP5' ) == -1 and f.find( 'OMIP' ) == -1: |
---|
173 | sh = shelve.open( f, 'r' ) |
---|
174 | for k in sh.keys(): |
---|
175 | if k[0] != '_': |
---|
176 | assert k not in self.sh, 'Duplicate key in revised table items: %s' % k |
---|
177 | |
---|
178 | vgl = sh[k][1].replace( '_', '-' ) |
---|
179 | ss.add(vgl) |
---|
180 | if vgl in ['cf3hr-grid', 'cfMon-3dmod', 'cfMon-2dmod']: |
---|
181 | if vgl not in sso: |
---|
182 | print 'Warning: skipping ',sh[k] |
---|
183 | sso.add( vgl ) |
---|
184 | vgid = '__unset__01__' |
---|
185 | else: |
---|
186 | vgl1 = vgmaps.get( vgl, vgl ) |
---|
187 | if vgl1 not in tref.uidByLabel: |
---|
188 | if 'CMIP5-%s' % vgl1 in tref.uidByLabel: |
---|
189 | vgl1 = 'CMIP5-%s' % vgl1 |
---|
190 | |
---|
191 | if vgl1 not in tref.uidByLabel: |
---|
192 | print 'SEVERE.requestvar.00001: group not found: %s: %s' % (vgl,f) |
---|
193 | vgid = '__vgid_not_found__' |
---|
194 | else: |
---|
195 | vgid = tref.uidByLabel[vgl1] |
---|
196 | cc[vgl1] += 1 |
---|
197 | vid = sh[k][3] |
---|
198 | self.sh[k] = sh[k][:] + [vgid,] |
---|
199 | if vid in self.ref.cmv: |
---|
200 | sg = self.ref.cmv[vid][-1] |
---|
201 | if sg not in [ '',None]: |
---|
202 | sgs = sg.split('_')[-1] |
---|
203 | vgl2 = '%s-%s' % (vgl1,sgs) |
---|
204 | if vgl2 in tref.uidByLabel: |
---|
205 | vg2 = tref.uidByLabel[vgl2] |
---|
206 | rr = sh[k][:] + [vg2,] |
---|
207 | rr[2] = rr[2] + '_' + sgs |
---|
208 | k2 = str( uuid.uuid1() ) |
---|
209 | self.sh[k2] = rr |
---|
210 | else: |
---|
211 | print 'INFO.sg.00050: not found; ',vgl2, sg, sh[k] |
---|
212 | else: |
---|
213 | print 'INFO.sg.00060: no subgroup; ',vgl, sg, sh[k] |
---|
214 | else: |
---|
215 | print 'INFO.sg.00070: no cmv; ',sh[k] |
---|
216 | kkk += 1 |
---|
217 | sh.close() |
---|
218 | print f, ss |
---|
219 | self.sh.close() |
---|
220 | for k in sorted( tref.uidByLabel.keys() ): |
---|
221 | print k, cc[k] |
---|
222 | for k in cc: |
---|
223 | if k not in tref.uidByLabel: |
---|
224 | print 'ERROR: ',k, cc[k] |
---|
225 | print 'TOTAL VARS REQUESTED HERE: ',kkk |
---|
226 | |
---|
227 | class refTabChk(object): |
---|
228 | |
---|
229 | def __init__(self,sdir='inSh'): |
---|
230 | """Review reference tables, as ingested into shelves, and annotate records""" |
---|
231 | assert os.path.isdir( sdir ), 'Input directory not found: %s' % inSH |
---|
232 | fl = sorted( glob.glob( '%s/sh__refso_*' % sdir ) ) |
---|
233 | tprobs = set() |
---|
234 | self.sh = shelve.open( '%s/sh__consol01_refTabs' % sdir, 'n' ) |
---|
235 | self.cmv = dict() |
---|
236 | self.cmvtv = dict() |
---|
237 | self.tabInfo = util_gen.tableInfo() |
---|
238 | self.sh['__cols__'] = self.tabInfo.oh |
---|
239 | self.sh['__info__'] = {'label':'refTable', 'title':'Reference variable table records generated by ingest.util_anal.refTabChk'} |
---|
240 | itab = self.tabInfo.oh.index( 'mipTable' ) |
---|
241 | icmv = self.tabInfo.oh.index( 'var' ) |
---|
242 | iix = self.tabInfo.oh.index( 'rowIndex' ) |
---|
243 | iv = self.tabInfo.oh.index( 'out_name' ) |
---|
244 | ivid = self.tabInfo.oh.index( 'vid' ) |
---|
245 | idim = self.tabInfo.oh.index( 'dimensions' ) |
---|
246 | iln = self.tabInfo.oh.index( 'title' ) |
---|
247 | iss = self.tabInfo.oh.index( 'ssect' ) |
---|
248 | igpid = self.tabInfo.oh.index( 'gpid' ) |
---|
249 | vref = util_varGroups.ref() |
---|
250 | tref = util_varGroups.varGroupSs() |
---|
251 | tref.loadGroups() |
---|
252 | vmsgs = set() |
---|
253 | |
---|
254 | gtm = set() |
---|
255 | sss = set() |
---|
256 | for f in fl: |
---|
257 | self.actions = collections.defaultdict(int) |
---|
258 | self.file = f |
---|
259 | sh = shelve.open( f, 'r' ) |
---|
260 | ks = [k for k in sh if k[0] != '_'] |
---|
261 | for k in ks: |
---|
262 | rr = sh[k][:] |
---|
263 | uid = rr[0] |
---|
264 | try: |
---|
265 | tab = rr[itab] |
---|
266 | tab = tref.mapGroupName(tab,'') |
---|
267 | var = rr[iv] |
---|
268 | if var not in vref.vars: |
---|
269 | if var not in vmsgs: |
---|
270 | print 'SEVERE.vars.00010: var not found: %s [%s]' % (var,tab) |
---|
271 | vmsgs.add( var ) |
---|
272 | vid = '-' |
---|
273 | else: |
---|
274 | vid = vref.vars[var][0] |
---|
275 | |
---|
276 | if tab not in tref.groupset: |
---|
277 | print 'SEVERE.tables.00001: tab not found: %s' % tab |
---|
278 | gpid = '-' |
---|
279 | else: |
---|
280 | gpid = tref.groupset[tab][0] |
---|
281 | gtm.add( (tab,tref.groupset[tab]) ) |
---|
282 | except: |
---|
283 | print k, sh[k] |
---|
284 | print itab,iv |
---|
285 | raise |
---|
286 | if tab not in tab2freq: |
---|
287 | tprobs.add(tab) |
---|
288 | rr[igpid] = gpid |
---|
289 | rr[ivid] = vid |
---|
290 | rr[-1] = int( float(rr[-1]) ) |
---|
291 | sg = '' |
---|
292 | ## check to see if this table has sections, and identify which section this variable is in if appropriate |
---|
293 | print 'INFO.sg.01001: ',tab,tab in self.tabInfo.tabChildren |
---|
294 | if rr[iss] != '': |
---|
295 | sg = rr[iss] |
---|
296 | sg1 = sg.replace( '_', '-' ) |
---|
297 | if sg1 in ['cf3hr-grid','Amon-oth']: |
---|
298 | sgid = '_na_' |
---|
299 | else: |
---|
300 | assert sg1 in tref.groupset, 'Subgroup not found in groupset: %s,%s' % (tab,sg1) |
---|
301 | sgid = tref.groupset[sg1][0] |
---|
302 | rr[iss] = (sg,sgid) |
---|
303 | print 'INFO.sg.00003: ',sg,sgid |
---|
304 | elif tab in self.tabInfo.tabChildren: |
---|
305 | if tab in ['Omon','Oyr']: |
---|
306 | sg = getSubGroupO(tab, var,rr[idim],rr[iln],rr[iix]) |
---|
307 | else: |
---|
308 | sg = getSubGroup(tab, var,rr[idim],rr[iln],rr[iix]) |
---|
309 | print 'INFO.sg.00001: ',tab,var,rr[idim],rr[iix], sg |
---|
310 | if sg != None: |
---|
311 | sg1 = sg.replace( '_', '-' ) |
---|
312 | if sg1 in ['Amon-oth']: |
---|
313 | sgid = '_na_' |
---|
314 | else: |
---|
315 | assert sg1 in tref.groupset, 'Subgroup not found in groupset: %s, %s' % (tab,sg) |
---|
316 | sgid = tref.groupset[sg1][0] |
---|
317 | rr[iss] = (sg,sgid) |
---|
318 | if (sg,sgid) not in sss: |
---|
319 | sss.add( (sg,sgid) ) |
---|
320 | print 'INFO.sg.00002: ',sg,sgid |
---|
321 | else: |
---|
322 | assert rr[iss] == '', 'Corrupted subgroup element: %s,%s,%s' % (iss,rr[iss],str(rr)) |
---|
323 | |
---|
324 | self.cmv[uid] = (tab, var, rr[idim], rr[iix], rr[iln],sg) |
---|
325 | self.cmvtv[(tab,var)] = uid |
---|
326 | if var == 'intppcalc': |
---|
327 | print rr |
---|
328 | if var in {'*','include Oyr 3D tracers'}: |
---|
329 | print 'ERROR.copy.00890: ',f,rr |
---|
330 | if k != uid: |
---|
331 | print 'SEVERE.uid.00020: uid/k mismatch: ',k,rr |
---|
332 | |
---|
333 | if var == 'msftmyz': |
---|
334 | print 'INFO.msftmyz.00001: ',vid, rr |
---|
335 | self.sh[k] = rr[:] |
---|
336 | print gtm |
---|
337 | self.sh['__source__'] = fl |
---|
338 | self.sh.close() |
---|
339 | print 'UNMAPPED tabs (to freq): ',tprobs |
---|
340 | |
---|
341 | class dynGrps(object): |
---|
342 | def __init__(self,rftc): |
---|
343 | sdir = 'inSh' |
---|
344 | sh = shelve.open( '%s/sh__requestScoping' % sdir ) |
---|
345 | sh0 = shelve.open( '%s/sh__requestScoping_0' % sdir, 'r' ) |
---|
346 | for k in sh0: |
---|
347 | mip, rec = sh0[k] |
---|
348 | print mip,rec |
---|
349 | |
---|
350 | class varGroupChk(object): |
---|
351 | cmip5Tables = [u'3hr', u'6hrLev', u'6hrPlev', u'Amon', u'LImon', u'Lmon', u'OImon', u'Oclim', u'Omon', u'Oyr', u'aero', u'cf3hr', u'cfDay', u'cfMon', u'cfOff', u'cfSites', u'day', u'fx'] |
---|
352 | def __init__(self,sdir='inSh'): |
---|
353 | """Review var groups, as ingested into shelves, and annotate records""" |
---|
354 | assert os.path.isdir( sdir ), 'Input directory not found: %s' % inSH |
---|
355 | self.ref = ref() |
---|
356 | self.shl = {} |
---|
357 | self.group2uid = {} |
---|
358 | self.vgss = util_varGroups.varGroupSs() |
---|
359 | self.vgss.loadGroups() |
---|
360 | self.vgss.setUid() |
---|
361 | self.vgss.checkMaps() |
---|
362 | self.varsetoo = None |
---|
363 | self.grps = set() |
---|
364 | self.tbls = set() |
---|
365 | self.frqs = set() |
---|
366 | self.nnew = 0 |
---|
367 | self.nc5 = 0 |
---|
368 | self.noth = 0 |
---|
369 | self.ots = set() |
---|
370 | isFirst = True |
---|
371 | #self.sh = shelve.open( '%s/sh__consol01_grp' % sdir, 'n' ) |
---|
372 | self.sh = shelve.open( '%s/sh__consol01_groupItems' % sdir, 'n' ) |
---|
373 | self.shdvg = shelve.open( '%s/sh__consol01_dynVarGroup_requestVars' % sdir, 'n' ) |
---|
374 | |
---|
375 | self.shdvg['__info__'] = {'label':'dynVarGroup_requestVars', 'title':'Request variable records for dynamically generated variable groups (through variable names listed in request scoping). Generated by ingest.util_anal.varGroupChk'} |
---|
376 | self.shdvg['__cols__'] = ['vid', 'title', 'label', 'priority', 'vgid', 'mip'] |
---|
377 | |
---|
378 | self.sh['__info__'] = {'label':'GroupItemsBeta', 'title':'Group Item records generated by ingest.util_anal.varGroupChk'} |
---|
379 | self.sh['__cols__'] = ['group', 'var', 'table', 'freq', 'descriptionEx', 'shape', 'levels', 'tstyle', 'mask', 'misc', 'mip', 'uid', 'oldv', 'rowIndex', 'var2', 'new', 'gpid', 'vkey', 'vid'] |
---|
380 | self.mg = dict() |
---|
381 | self.checkRequestedGroups() |
---|
382 | |
---|
383 | fl = sorted( glob.glob( '%s/sh__grp_*' % sdir ) ) |
---|
384 | for f in fl: |
---|
385 | self.scanFile(f,isFirst) |
---|
386 | isFirst = False |
---|
387 | |
---|
388 | self.sh.close() |
---|
389 | self.shdvg.close() |
---|
390 | if self.varsetoo != None: |
---|
391 | self.varsetoo.close() |
---|
392 | |
---|
393 | sh = shelve.open( '%s/sh__requestScoping_1' % sdir,'n' ) |
---|
394 | for k in self.group2uid: |
---|
395 | mip, uid, new = self.group2uid[k] |
---|
396 | sh['%s__1' % str(k)] = (mip, uid ) |
---|
397 | sh.close() |
---|
398 | |
---|
399 | for kkk in sorted(self.mg.keys()): |
---|
400 | print 'WARNING: possible problem identifying group: ',self.mg[kkk] |
---|
401 | print 'groups: ',self.grps |
---|
402 | print 'tables: ',self.tbls |
---|
403 | print 'frequencies: ',self.frqs |
---|
404 | print 'nrefs to new: %s, cmip5: %s, other: %s' % (self.nnew, self.nc5, self.noth) |
---|
405 | print 'other tables: ',self.ots |
---|
406 | for k in self.shl: |
---|
407 | self.shl[k].close() |
---|
408 | |
---|
409 | def scanFile(self,f,isFirst): |
---|
410 | self.actions = collections.defaultdict(int) |
---|
411 | self.file = f |
---|
412 | self.shl[f] = shelve.open( f, 'r' ) |
---|
413 | self.mip = self.shl[f]['__info__']['label'] |
---|
414 | if isFirst: |
---|
415 | print self.shl[f]['__cols__'] |
---|
416 | ks = [k for k in self.shl[f].keys() if k[0] != '_'] |
---|
417 | |
---|
418 | for k in ks: |
---|
419 | r = self.shl[f][k] |
---|
420 | self.grps.add(r[0]) |
---|
421 | self.tbls.add(r[2]) |
---|
422 | self.frqs.add(r[3]) |
---|
423 | var = r[1].strip() |
---|
424 | if var != r[1]: |
---|
425 | print 'WARNING.blanks.0001: *%s* and *%s (%s,%s)' % (var,r[1],r[0],f) |
---|
426 | extra = {} |
---|
427 | if r[2] == 'new' or r[2][:2] == 'em': |
---|
428 | extra['mode'] = 'new' |
---|
429 | self.nnew +=1 |
---|
430 | if var not in self.ref.vars: |
---|
431 | if var.find('_') != -1: |
---|
432 | v2 = var.replace('_','') |
---|
433 | if v2 in self.ref.vars: |
---|
434 | print 'WARN: %s should be replaced with %s' % (var,v2) |
---|
435 | print 'ERROR.missing.0001: variable not found: ',var,r[0],f |
---|
436 | else: |
---|
437 | if r[9] == '': |
---|
438 | p1 = -1 |
---|
439 | else: |
---|
440 | p1 = int( r[9] ) |
---|
441 | p2 = int( self.ref.vars[var][1] ) |
---|
442 | if p1 not in [1,2,3]: |
---|
443 | if p2 in [1,2,3]: |
---|
444 | extra['priority'] = p2 |
---|
445 | else: |
---|
446 | print 'ERROR.priority.0001: %s (%s::%s), ref: %s, templ: %s' % (var,r[0],f,p2,p1) |
---|
447 | |
---|
448 | elif r[2] in self.cmip5Tables or r[2][:5] == 'CMIP5' and r[2][6:] in self.cmip5Tables: |
---|
449 | extra['mode'] = 'CMIP5' |
---|
450 | self.nc5 += 1 |
---|
451 | if var not in self.ref.vars: |
---|
452 | print 'ERROR.missing.0003: variable not found: ',var,r[0],f |
---|
453 | else: |
---|
454 | extra['mode'] = 'OTHER MIP' |
---|
455 | self.noth += 1 |
---|
456 | self.ots.add( r[2] ) |
---|
457 | if var not in self.ref.vars: |
---|
458 | print 'ERROR.missing.0002: variable not found: ',var,r[0],f |
---|
459 | ## |
---|
460 | ## but this is the MIP variable id ...... need the CMOR variable ID ?? ... |
---|
461 | ## |
---|
462 | vid = self.ref.vars.get(var,[None,])[0] |
---|
463 | self.consol(k,r,vid,extra) |
---|
464 | for k in sorted( self.actions.keys() ): |
---|
465 | print 'ACTIONS: %s: %s -- %s' % (f,k,self.actions[k]) |
---|
466 | |
---|
467 | |
---|
468 | def varsets(self, thisuid, i1, refuid, i2, title ): |
---|
469 | if self.varsetoo == None: |
---|
470 | self.varsetoo = open('varsets.txt','w' ) |
---|
471 | self.varsetoo.write( '\t'.join( [thisuid, str(i1), refuid, str(i2), title] ) + '\n' ) |
---|
472 | |
---|
473 | def consol(self,rk,rr,vid,extra): |
---|
474 | ##['Short name of group', 'Variable short name', 'Table', 'Frequency', 'Description extension (optional)', 'Shape', 'Levels', 'Time mean, point or climatology', 'Mask (optional)', 'Priority', 'MIP','uid','rowIndex', 'Prev. Var Name'] |
---|
475 | ## |
---|
476 | ## following is provided by sx2.py: |
---|
477 | ##['group', 'var', 'table', 'freq', 'descriptionEx', 'shape', 'levels', 'tstyle', 'mask', 'misc', 'mip', 'uid', 'rowIndex', 'new', 'gpid', 'vkey', 'vid'] |
---|
478 | ## need to add vid (info found above), vgid, "new" flag, (also above), and "vkey". |
---|
479 | |
---|
480 | rset = [] |
---|
481 | r = list(rr) |
---|
482 | if 'priority' in extra: |
---|
483 | r[9] = extra['priority'] |
---|
484 | elif r[9] == '': |
---|
485 | r[9] = -1 |
---|
486 | else: |
---|
487 | r[9] = int( r[9] ) |
---|
488 | |
---|
489 | if not self.vgss.matchGroup(r[0],self.mip): |
---|
490 | print 'ERROR.group.00006: group not found: %s [%s]' % (r[0],self.mip) |
---|
491 | self.mg[r[0]] = (self.mip,r) |
---|
492 | gpid = '__noGroupFound__' |
---|
493 | new = 0 |
---|
494 | else: |
---|
495 | print 'INFO.group.00007: group found: %s (%s):: %s, %s' % (r[0],self.mip,self.vgss.groupName, self.vgss.groupMatch ) |
---|
496 | if r[0] in self.group2uid: |
---|
497 | assert self.vgss.groupMatchRes == self.group2uid[r[0]], 'Mismatch in group lookup ..%s [%s,%s]' % (r[0], str(self.vgss.groupMatchRes), str(self.group2uid[r[0]]) ) |
---|
498 | else: |
---|
499 | self.group2uid[r[0]] = self.vgss.groupMatchRes |
---|
500 | gpid = str(self.vgss.groupMatchRes[1] ) |
---|
501 | isnew = self.vgss.groupMatchRes[2] == 'new' |
---|
502 | if isnew: |
---|
503 | new = 1 |
---|
504 | else: |
---|
505 | new = -1 |
---|
506 | |
---|
507 | il = 6 |
---|
508 | idx = 4 |
---|
509 | ixp = 9 |
---|
510 | iu = 11 |
---|
511 | assert r[iu] == rk, 'CONFUSED ABOUT UIDs? %s, %s' % (rk,r[iu]) |
---|
512 | var2 = r[1] |
---|
513 | if r[il] in {u'17 (or 23 )', u'17 (or 23)', u'17 (or23)', u'10/17/23'}: |
---|
514 | self.actions['Create 17/23 level pair'] += 1 |
---|
515 | if r[il] == u'10/17/23': |
---|
516 | lev0 = 10 |
---|
517 | levsp = [19,23] |
---|
518 | else: |
---|
519 | lev0 = 19 |
---|
520 | levsp = [23,] |
---|
521 | r[ixp] = 1 |
---|
522 | r[il] = lev0 |
---|
523 | var2 = var2 + str( lev0 ) |
---|
524 | p = 1 |
---|
525 | for lp in levsp: |
---|
526 | p += 1 |
---|
527 | rr0 = r + [r[1] + str(lp),] |
---|
528 | rr0[ixp] = p |
---|
529 | rr0[il] = lp |
---|
530 | rr0[iu] = str( uuid.uuid1() ) |
---|
531 | rr = rr0 + [new, gpid, 0, vid] |
---|
532 | if r[1] == 'hus': |
---|
533 | print 'INFO.hus.00202: ',rr |
---|
534 | rset.append( rr ) |
---|
535 | self.varsets(rr0[iu], 2, rr[iu], 1, 'variables at multiple vertical resolutions' ) |
---|
536 | |
---|
537 | elif r[il] in [u'Model levels or plev_27', u'Model levels or 27Plevs',u'27',27.]: |
---|
538 | self.actions['27 level variant of variable name created' ] += 1 |
---|
539 | var2 += '27' |
---|
540 | r[il] = 27 |
---|
541 | elif r[idx].find( '850 hPa' ) != -1: |
---|
542 | if r[1][-3:] != '850': |
---|
543 | self.actions['850mb variant of variable generated'] += 1 |
---|
544 | var2 += '850' |
---|
545 | elif type( r[il] ) == type(1.) and r[il] == 7. and r[10] == 'HighResMIP' and r[5] != 'XYZKT': |
---|
546 | self.actions['Shape modified to XYZKT*'] += 1 |
---|
547 | r[5] = r[5] + '*' |
---|
548 | r[il] = '7h' |
---|
549 | r += [var2,new, gpid, 0, vid] |
---|
550 | rset.append( r ) |
---|
551 | if r[10] != 'OMIP': |
---|
552 | for ri in rset: |
---|
553 | self.consol_sub01( ri, extra ) |
---|
554 | else: |
---|
555 | self.actions['OMIP records skipped'] += 1 |
---|
556 | |
---|
557 | def consol_sub01( self, r, extra ): |
---|
558 | """Write our a requestItem record, which is split into a CMORvar and requestVar in isd3b.py""" |
---|
559 | iu = 11 |
---|
560 | assert r[iu] not in self.sh |
---|
561 | assert len(r) == len( self.sh['__cols__'] ), 'Bad record length .. %s' % str(r) |
---|
562 | self.sh[r[iu]] = r[:] |
---|
563 | |
---|
564 | def checkRequestedGroups(self,sdir='inSh'): |
---|
565 | sh = shelve.open( '%s/sh__requestScoping' % sdir, 'r' ) |
---|
566 | sh0 = shelve.open( '%s/sh__requestScoping_0' % sdir, 'n' ) |
---|
567 | cc = collections.defaultdict( int ) |
---|
568 | ff = set() |
---|
569 | for mip in sorted( sh['__records__'].keys() ): |
---|
570 | reqg = sh['__records__'][mip] |
---|
571 | reqgd = dict() |
---|
572 | self.mip = mip |
---|
573 | for rec in reqg: |
---|
574 | g = rec[0] |
---|
575 | if g[:5] == 'CMIP5': |
---|
576 | g = g[6:] |
---|
577 | g0 = g |
---|
578 | t = self.vgss.recChk(rec,mip) |
---|
579 | dyngrp = t[0] |
---|
580 | if dyngrp: |
---|
581 | mtab, g = t[2] |
---|
582 | if not self.vgss.matchGroup(g,self.mip) and not dyngrp: |
---|
583 | print 'FAILED to match request group: ',self.mip,rec |
---|
584 | ff.add( (self.mip,g) ) |
---|
585 | else: |
---|
586 | print 'INFO.group.00008: group found: %s (%s):: %s, %s' % (g,self.mip,self.vgss.groupName, self.vgss.groupMatch ) |
---|
587 | assert self.vgss.groupMatchRes != None, 'Should have results from matchGroup here!!' |
---|
588 | reqgd[g] = self.vgss.groupMatchRes |
---|
589 | uid = str(self.vgss.groupMatchRes[1] ) |
---|
590 | rec[0] = self.vgss.groupName |
---|
591 | kkk = '%s__%2.2i__%s' % (uid,cc[uid],0) |
---|
592 | cc[uid] += 1 |
---|
593 | assert kkk not in sh0, 'Duplicate key for request link record: %s' % kkk |
---|
594 | sh0[kkk] = [self.mip, rec ] |
---|
595 | print 'INFO.vgrp.00001: ',kkk,self.mip,rec |
---|
596 | if dyngrp: |
---|
597 | self.saveDynGrp(g,mtab,uid) |
---|
598 | ## |
---|
599 | ## need to generate request variables for dynamic groups here (those detected via vgss ) |
---|
600 | ## would be nice to stage things ... create variable database and then add requests .... |
---|
601 | ## "all at once" risks mis-alignments ... |
---|
602 | ## |
---|
603 | ## need to fill in details on dynamic groups after completion of scan of new groups. |
---|
604 | ## |
---|
605 | ## |
---|
606 | sh.close() |
---|
607 | sh0.close() |
---|
608 | if len(ff) > 0: |
---|
609 | oo = open( 'varGroupSuggestions.csv', 'w' ) |
---|
610 | for t in sorted( list( ff ) ): |
---|
611 | oo.write( '%s\t%s' % t ) |
---|
612 | oo.write( '\t%s\n' % str( uuid.uuid1() ) ) |
---|
613 | oo.close() |
---|
614 | |
---|
615 | def saveDynGrp(self,glab,tab,rgid): |
---|
616 | this = self.vgss.s2[glab] |
---|
617 | print 'INFO.dyngrp.00001: ',rgid,this, glab |
---|
618 | for k in this: |
---|
619 | if k[0] != '_': |
---|
620 | if this[k] == None: |
---|
621 | print 'ERROR.dyngrp.00002: variable not found: ',rgid,k, glab |
---|
622 | else: |
---|
623 | vid, priority = this[k] |
---|
624 | rec = [vid, '%s: %s' % (glab,k), k, priority, rgid, self.mip] |
---|
625 | uid = str(uuid.uuid1() ) |
---|
626 | self.shdvg[uid] = rec |
---|
627 | |
---|