1 | |
---|
2 | import shelve, glob, os, uuid, collections, string |
---|
3 | from utils_wb import workbook |
---|
4 | |
---|
5 | ## |
---|
6 | ## want to match up with input/vars_... and where is cmv uid fixed ???? |
---|
7 | ## |
---|
8 | ## |
---|
9 | ## need to migrate new vars into input/vars_ ... especially from LUMIP ... |
---|
10 | ## |
---|
11 | ## new variables which need to be added have been identified. No check for modifications (and no clear workflow here)... |
---|
12 | ## ned to check .. a few appear spurious .. and then generate csv records ... for manual copy ... |
---|
13 | ## |
---|
14 | ## |
---|
15 | ## |
---|
16 | ## add AerChemMip and VIACS to stuff scanned and checked below ..... |
---|
17 | |
---|
18 | class ref(object): |
---|
19 | def __init__(self,sdir='inSh'): |
---|
20 | dir1 = '/data/tmp/svn3/exarch/CMIP6dreqbuild/trunk/src/workbook' |
---|
21 | wb = workbook( '%s/%s' % (dir1,'inputs/vars_20160721.xls') ) |
---|
22 | self.vars = {} |
---|
23 | s1 = wb.book.sheet_by_name(u'var') |
---|
24 | su = set() |
---|
25 | for i in range( s1.nrows ): |
---|
26 | r = s1.row(i) |
---|
27 | assert r[0] not in self.vars, 'DUPLICATE VARIABLE: %s' % str(r) |
---|
28 | self.vars[r[0].value] = (r[9].value,r[10].value) |
---|
29 | fl = sorted( glob.glob( '%s/sh__newVar_*' % sdir ) ) |
---|
30 | for f in fl: |
---|
31 | sh = shelve.open( f, 'r' ) |
---|
32 | ks = [k for k in sh.keys() if k[0] != '_'] |
---|
33 | for k in ks: |
---|
34 | if k not in self.vars: |
---|
35 | print 'MISSING NEW: %s, %s, %s' % (k,f,str(sh[k])) |
---|
36 | |
---|
37 | class varGroupChk(object): |
---|
38 | cmip5Tables = [u'3hr', u'6hrLev', u'6hrPlev', u'Amon', u'LImon', u'Lmon', u'OImon', u'Oclim', u'Omon', u'Oyr', u'aero', u'cf3hr', u'cfDay', u'cfMon', u'cfOff', u'cfSites', u'day', u'fx'] |
---|
39 | def __init__(self,sdir='inSh'): |
---|
40 | """Review var groups, as ingested into shelves, and annotate records""" |
---|
41 | assert os.path.isdir( sdir ), 'Input directory not found: %s' % inSH |
---|
42 | fl = sorted( glob.glob( '%s/sh__grp_*' % sdir ) ) |
---|
43 | self.ref = ref() |
---|
44 | self.shl = {} |
---|
45 | self.group2uid = {} |
---|
46 | grps = set() |
---|
47 | tbls = set() |
---|
48 | frqs = set() |
---|
49 | nnew = 0 |
---|
50 | nc5 = 0 |
---|
51 | noth = 0 |
---|
52 | ots = set() |
---|
53 | isFirst = True |
---|
54 | self.sh = shelve.open( '%s/sh__consol01_grp' % sdir, 'n' ) |
---|
55 | self.shc = shelve.open( '%s/sh__consol01_groupItems' % sdir, 'n' ) |
---|
56 | |
---|
57 | self.shc['__info__'] = {'label':'GroupItemsBeta', 'title':'Group Item records generated by ingest.util_anal.varGroupChk'} |
---|
58 | self.shc['__cols__'] = ['group', 'var', 'table', 'freq', 'descriptionEx', 'shape', 'levels', 'tstyle', 'mask', 'misc', 'mip', 'uid', 'rowIndex', 'var2', 'new', 'gpid', 'vkey', 'vid'] |
---|
59 | self.mg = dict() |
---|
60 | self.loadGroups() |
---|
61 | self.checkRequestedGroups() |
---|
62 | for f in fl: |
---|
63 | self.actions = collections.defaultdict(int) |
---|
64 | self.file = f |
---|
65 | self.shl[f] = shelve.open( f, 'r' ) |
---|
66 | self.mip = self.shl[f]['__info__']['label'] |
---|
67 | if isFirst: |
---|
68 | print self.shl[f]['__cols__'] |
---|
69 | isFirst = False |
---|
70 | ks = [k for k in self.shl[f].keys() if k[0] != '_'] |
---|
71 | print f, len(ks) |
---|
72 | for k in ks: |
---|
73 | r = self.shl[f][k] |
---|
74 | grps.add(r[0]) |
---|
75 | tbls.add(r[2]) |
---|
76 | frqs.add(r[3]) |
---|
77 | var = r[1].strip() |
---|
78 | if var != r[1]: |
---|
79 | print 'WARNING.blanks.0001: *%s* and *%s (%s,%s)' % (var,r[1],r[0],f) |
---|
80 | extra = {} |
---|
81 | if r[2] == 'new' or r[2][:2] == 'em': |
---|
82 | extra['mode'] = 'new' |
---|
83 | nnew +=1 |
---|
84 | if var not in self.ref.vars: |
---|
85 | if var.find('_') != -1: |
---|
86 | v2 = var.replace('_','') |
---|
87 | if v2 in self.ref.vars: |
---|
88 | print 'WARN: %s should be replaced with %s' % (var,v2) |
---|
89 | print 'ERROR.missing.0001: variable not found: ',var,r[0],f |
---|
90 | else: |
---|
91 | if r[9] == '': |
---|
92 | p1 = -1 |
---|
93 | else: |
---|
94 | p1 = int( r[9] ) |
---|
95 | p2 = int( self.ref.vars[var][1] ) |
---|
96 | if p1 not in [1,2,3]: |
---|
97 | if p2 in [1,2,3]: |
---|
98 | extra['priority'] = p2 |
---|
99 | else: |
---|
100 | print 'ERROR.priority.0001: %s (%s::%s), ref: %s, templ: %s' % (var,r[0],f,p2,p1) |
---|
101 | |
---|
102 | elif r[2] in self.cmip5Tables or r[2][:5] == 'CMIP5' and r[2][6:] in self.cmip5Tables: |
---|
103 | extra['mode'] = 'CMIP5' |
---|
104 | nc5 += 1 |
---|
105 | if var not in self.ref.vars: |
---|
106 | print 'ERROR.missing.0003: variable not found: ',var,r[0],f |
---|
107 | else: |
---|
108 | extra['mode'] = 'OTHER MIP' |
---|
109 | noth += 1 |
---|
110 | ots.add( r[2] ) |
---|
111 | if var not in self.ref.vars: |
---|
112 | print 'ERROR.missing.0002: variable not found: ',var,r[0],f |
---|
113 | ## |
---|
114 | ## but this is the MIP variable id ...... need the CMOR variable ID ?? ... |
---|
115 | ## |
---|
116 | vid = self.ref.vars.get(var,[None,])[0] |
---|
117 | self.consol(k,r,vid,extra) |
---|
118 | for k in sorted( self.actions.keys() ): |
---|
119 | print 'ACTIONS: %s: %s -- %s' % (f,k,self.actions[k]) |
---|
120 | |
---|
121 | self.sh.close() |
---|
122 | sh = shelve.open( '%s/sh__requestScoping' % sdir ) |
---|
123 | for k in self.group2uid: |
---|
124 | mip, uid = self.group2uid[k] |
---|
125 | sh['%s__1' % str(k)] = (mip, uid ) |
---|
126 | sh.close() |
---|
127 | |
---|
128 | for kkk in sorted(self.mg.keys()): |
---|
129 | print 'WARNING: possible problem identifying group: ',self.mg[kkk] |
---|
130 | print 'groups: ',grps |
---|
131 | print 'tables: ',tbls |
---|
132 | print 'frequencies: ',frqs |
---|
133 | print 'nrefs to new: %s, cmip5: %s, other: %s' % (nnew, nc5, noth) |
---|
134 | print 'other tables: ',ots |
---|
135 | for k in self.shl: |
---|
136 | self.shl[k].close() |
---|
137 | |
---|
138 | def loadGroups(self): |
---|
139 | wb = workbook( 'sortedVarGroups.xls' ) |
---|
140 | sh = wb.book.sheet_by_name( 'Sheet1' ) |
---|
141 | self.groupset = {} |
---|
142 | for i in range( sh.nrows ): |
---|
143 | rr = [x.value for x in sh.row(i)] |
---|
144 | if rr[4] == u'': |
---|
145 | self.groupset[rr[0]] = ( rr[7], rr[8], rr[1], rr[2], rr[6] ) |
---|
146 | else: |
---|
147 | self.groupset[rr[0]] = ( rr[7], rr[8], rr[1], rr[2], rr[4], rr[6] ) |
---|
148 | |
---|
149 | def matchGroup(self,k): |
---|
150 | td = dict() |
---|
151 | td2 = dict() |
---|
152 | mext = {'C4MIP':'LUMIP'} |
---|
153 | self.groupMatchRes = None |
---|
154 | for x in self.groupset: |
---|
155 | if self.groupset[x][2] == self.mip or self.mip in mext and self.groupset[x][2] == mext[self.mip]: |
---|
156 | for y in self.groupset[x][4:]: |
---|
157 | assert y not in td, 'Duplicated lookup for %s (%s,%s)' % (y,td[y],self.groupset[x][0]) |
---|
158 | td[y] = (self.groupset[x][0],self.groupset[x][1]) |
---|
159 | if self.groupset[x][3] != u'': |
---|
160 | assert self.groupset[x][3] not in td2, 'Duplicated lookup for %s (%s,%s)' % (self.groupset[x][3],td2[self.groupset[x][3]],self.groupset[x][0]) |
---|
161 | td2[self.groupset[x][3]] = (self.groupset[x][0],self.groupset[x][1]) |
---|
162 | |
---|
163 | if '%s.%s' % (self.mip,k) in td: |
---|
164 | self.groupMatch = 1 |
---|
165 | k1 = '%s.%s' % (self.mip,k) |
---|
166 | self.groupMatchRes = (k1,td[k1][0],td[k1][1]) |
---|
167 | return True |
---|
168 | |
---|
169 | if k in self.groupset: |
---|
170 | self.groupMatch = 0 |
---|
171 | self.groupMatchRes = (k,self.groupset[k][0],self.groupset[k][1]) |
---|
172 | return True |
---|
173 | |
---|
174 | kb = '%s-%s' % ('CMIP5',k.replace('_','-')) |
---|
175 | if kb in self.groupset: |
---|
176 | self.groupMatch = 2 |
---|
177 | self.groupMatchRes = (kb,self.groupset[kb][0],self.groupset[k][1]) |
---|
178 | return True |
---|
179 | |
---|
180 | if k in td2: |
---|
181 | self.groupMatch = 3 |
---|
182 | self.groupMatchRes = (k,td2[k][0],td2[k][1]) |
---|
183 | return True |
---|
184 | |
---|
185 | return False |
---|
186 | |
---|
187 | def consol(self,rk,rr,vid,extra): |
---|
188 | ##['Short name of group', 'Variable short name', 'Table', 'Frequency', 'Description extension (optional)', 'Shape', 'Levels', 'Time mean, point or climatology', 'Mask (optional)', 'Priority', 'MIP','uid','rowIndex', 'Prev. Var Name'] |
---|
189 | ## |
---|
190 | ## following is provided by sx2.py: |
---|
191 | ##['group', 'var', 'table', 'freq', 'descriptionEx', 'shape', 'levels', 'tstyle', 'mask', 'misc', 'mip', 'uid', 'rowIndex', 'new', 'gpid', 'vkey', 'vid'] |
---|
192 | ## need to add vid (info found above), vgid, "new" flag, (also above), and "vkey". |
---|
193 | |
---|
194 | rset = [] |
---|
195 | r = list(rr) |
---|
196 | if 'priority' in extra: |
---|
197 | r[9] = extra['priority'] |
---|
198 | elif r[9] == '': |
---|
199 | r[9] = -1 |
---|
200 | else: |
---|
201 | r[9] = int( r[9] ) |
---|
202 | |
---|
203 | if not self.matchGroup(r[0]): |
---|
204 | self.mg[r[0]] = (self.mip,r) |
---|
205 | gpid = '__noGroupFound__' |
---|
206 | new = 0 |
---|
207 | else: |
---|
208 | if r[0] in self.group2uid: |
---|
209 | assert self.groupMatchRes == self.group2uid[r[0]], 'Mismatch in group lookup ..%s [%s,%s]' % (r[0], str(self.groupMatchRes), str(self.group2uid[r[0]]) ) |
---|
210 | else: |
---|
211 | self.group2uid[r[0]] = self.groupMatchRes |
---|
212 | gpid = str(self.groupMatchRes[1] ) |
---|
213 | isnew = self.groupMatchRes[2] == 'new' |
---|
214 | if isnew: |
---|
215 | new = 1 |
---|
216 | else: |
---|
217 | new = -1 |
---|
218 | rr = r + [new, gpid, 0, vid] |
---|
219 | |
---|
220 | |
---|
221 | |
---|
222 | il = 6 |
---|
223 | idx = 4 |
---|
224 | ixp = 9 |
---|
225 | iu = 11 |
---|
226 | assert r[iu] == rk, 'CONFUSED ABOUT UIDs? %s, %s' % (rk,r[iu]) |
---|
227 | var2 = r[1] |
---|
228 | if r[il] in {u'17 (or 23 )', u'17 (or 23)', u'17 (or23)', u'10/17/23'}: |
---|
229 | self.actions['Create 17/23 level pair'] += 1 |
---|
230 | if r[il] == u'10/17/23': |
---|
231 | lev0 = 10 |
---|
232 | levsp = [19,23] |
---|
233 | else: |
---|
234 | lev0 = 19 |
---|
235 | levsp = [23,] |
---|
236 | r[ixp] = 1 |
---|
237 | var2 = var2 + str( lev0 ) |
---|
238 | p = 1 |
---|
239 | for lp in levsp: |
---|
240 | p += 1 |
---|
241 | rr0 = r + [r[1] + str(lp),] |
---|
242 | rr0[ixp] = p |
---|
243 | rr0[il] = lp |
---|
244 | rr0[iu] = str( uuid.uuid1() ) |
---|
245 | rset.append( rr0 ) |
---|
246 | |
---|
247 | elif r[il] in [u'Model levels or plev_27', u'Model levels or 27Plevs',u'27',27.]: |
---|
248 | self.actions['27 level variant of variable name created' ] += 1 |
---|
249 | var2 += '27' |
---|
250 | elif r[idx].find( '850 hPa' ) != -1: |
---|
251 | if r[1][-3:] != '850': |
---|
252 | self.actions['850mb variant of variable generated'] += 1 |
---|
253 | var2 += '850' |
---|
254 | else: |
---|
255 | if type( r[il] ) == type(1.) and r[il] == 7. and r[10] == 'HighResMIP' and r[5] != 'XYZKT': |
---|
256 | self.actions['Shape modified to XYZKT*'] += 1 |
---|
257 | r[5] = r[5] + '*' |
---|
258 | r += [var2,] |
---|
259 | rset.append( r ) |
---|
260 | if r[10] != 'OMIP': |
---|
261 | for ri in rset: |
---|
262 | self.consol_sub01( ri, extra ) |
---|
263 | else: |
---|
264 | self.actions['OMIP records skipped'] += 1 |
---|
265 | |
---|
266 | def consol_sub01( self, r, extra ): |
---|
267 | iu = 11 |
---|
268 | assert r[iu] not in self.sh |
---|
269 | self.sh[r[iu]] = r[:] |
---|
270 | |
---|
271 | def checkRequestedGroups(self,sdir='inSh'): |
---|
272 | sh = shelve.open( '%s/sh__requestScoping' % sdir ) |
---|
273 | for mip in sh['__records__'].keys(): |
---|
274 | reqg = {r[0] for r in sh['__records__'][mip]} |
---|
275 | reqgd = dict() |
---|
276 | self.mip = mip |
---|
277 | for g in reqg: |
---|
278 | if not self.matchGroup(g): |
---|
279 | print 'FAILED to match request group: ',self.mip,g |
---|
280 | else: |
---|
281 | assert self.groupMatchRes != None, 'Should have results from matchGroup here!!' |
---|
282 | reqgd[g] = self.groupMatchRes |
---|
283 | uid = str(self.groupMatchRes[1] ) |
---|
284 | sh['%s__%s' % (uid,0)] = [self.mip, g ] |
---|
285 | sh.close() |
---|
286 | |
---|