1 | from dreqPy import dreq |
---|
2 | import collections, string, os, utils_wb |
---|
3 | import htmlTemplates as tmpl |
---|
4 | import xml, re, uuid |
---|
5 | import xml.dom, xml.dom.minidom |
---|
6 | import sets |
---|
7 | import xlsxwriter |
---|
8 | |
---|
9 | from utils_wb import uniCleanFunc |
---|
10 | |
---|
11 | if os.path.isfile( 'refDefaultP.txt' ): |
---|
12 | refpix = {} |
---|
13 | for l in open( 'refDefaultP.txt' ).readlines(): |
---|
14 | bits = string.split( string.strip(l), '\t' ) |
---|
15 | assert len(bits) == 4, 'Bad record found in %s' % 'refDefaultP.txt' |
---|
16 | refpix[bits[0]] = (bits[1],bits[2], int(bits[3]) ) |
---|
17 | else: |
---|
18 | refpix = None |
---|
19 | |
---|
20 | empty=re.compile('^$') |
---|
21 | |
---|
22 | src1 = '../workbook/trial2_20150831.xml' |
---|
23 | |
---|
24 | dq = dreq.loadDreq(dreqXML=src1) |
---|
25 | inx = dq.inx |
---|
26 | ##inx.makeVarRefs() |
---|
27 | ix_rql_uid = {} |
---|
28 | ix_rqvg_uid = {} |
---|
29 | ix_ovar_uid = {} |
---|
30 | ix_gpi_uid = {} |
---|
31 | list_gp_ovar = collections.defaultdict( list ) |
---|
32 | xr_var_ovar = collections.defaultdict( list ) |
---|
33 | xr_var_gpi = collections.defaultdict( list ) |
---|
34 | rql_by_name = collections.defaultdict( list ) |
---|
35 | |
---|
36 | def makeVarRefs(uid, var, iref_by_uid): |
---|
37 | varRefs = {} |
---|
38 | for thisuid in var.uid.keys(): |
---|
39 | if iref_by_uid.has_key(thisuid): |
---|
40 | ee1 = collections.defaultdict( list ) |
---|
41 | for k,i in iref_by_uid[thisuid]: |
---|
42 | thisi = uid[i] |
---|
43 | sect = thisi._h.label |
---|
44 | if sect == 'groupItem': |
---|
45 | ee1[sect].append( '%s.%s' % (thisi.mip, thisi.group) ) |
---|
46 | elif sect == 'ovar': |
---|
47 | ee1[sect].append( thisi.mipTable ) |
---|
48 | elif sect == 'revisedTabItem': |
---|
49 | ee1[sect].append( '%s.%s' % (thisi.mip, thisi.table) ) |
---|
50 | varRefs[thisuid] = ee1 |
---|
51 | return varRefs |
---|
52 | |
---|
53 | varRefs = makeVarRefs( inx.uid, inx.var, inx.iref_by_uid) |
---|
54 | |
---|
55 | class updates(object): |
---|
56 | delToks = sets.Set( ['inc','omit'] ) |
---|
57 | def __init__(self,fndup,fnmult,idir='rev1'): |
---|
58 | assert os.path.isdir( idir ), 'Directory %s not found' % idir |
---|
59 | self.fdup = '%s/%s' % (idir,fndup) |
---|
60 | self.fmult = '%s/%s' % (idir,fnmult) |
---|
61 | for p in [self.fdup,self.fmult]: |
---|
62 | assert os.path.isfile( p ), 'File %s not found' % p |
---|
63 | self.repl = {} |
---|
64 | self.upd = {} |
---|
65 | self.twins = [] |
---|
66 | self.ddel = {} |
---|
67 | |
---|
68 | def scandup(self): |
---|
69 | ii = open( self.fdup ).readlines() |
---|
70 | nn = (len(ii)-1)/2 |
---|
71 | for i in range(nn): |
---|
72 | l1 = string.split( ii[i*2+1], '\t' ) |
---|
73 | l2 = string.split( ii[i*2+2], '\t' ) |
---|
74 | xx = l1[8:10] |
---|
75 | yy = l2[8:10] |
---|
76 | if xx[1] == '' and yy[1] == xx[0]: |
---|
77 | ths = 0 |
---|
78 | assert not self.repl.has_key( yy[0] ), 'duplicate replacement request for %s' % yy[0] |
---|
79 | self.repl[ yy[0] ] = yy[1] |
---|
80 | elif yy[1] == '' and xx[1] == yy[0]: |
---|
81 | ths = 1 |
---|
82 | assert not self.repl.has_key( xx[0] ), 'duplicate replacement request for %s' % xx[0] |
---|
83 | self.repl[ xx[0] ] = xx[1] |
---|
84 | elif l1[10] == 'twin' and l2[10] == 'twin': |
---|
85 | ths = 2 |
---|
86 | self.twins.append( l1[8] ) |
---|
87 | self.twins.append( l2[8] ) |
---|
88 | elif l1[10] in self.delToks and l2[10] in self.delToks: |
---|
89 | ths = 3 |
---|
90 | self.ddel[ l1[8] ] = (l1[10],l1[11]) |
---|
91 | self.ddel[ l2[8] ] = (l2[10],l2[11]) |
---|
92 | elif xx[1] == '' and yy[1] == "": |
---|
93 | print 'WARN.087.00001: uncorrected duplication ..... %s ' % str( l1[:5] ) |
---|
94 | else: |
---|
95 | ths = -1 |
---|
96 | print 'ERROR.xxx.0001: Match failed' |
---|
97 | print l1 |
---|
98 | print l2 |
---|
99 | assert False |
---|
100 | |
---|
101 | def scanmult(self): |
---|
102 | ii = open( self.fmult ).readlines() |
---|
103 | nn = (len(ii)-1)/3 |
---|
104 | for i in range(nn): |
---|
105 | l1 = string.split( ii[i*3+1], '\t' ) |
---|
106 | l2 = string.split( ii[i*3+2], '\t' ) |
---|
107 | l3 = string.split( ii[i*3+3], '\t' ) |
---|
108 | yy = [l1[9],l2[9],l3[9]] |
---|
109 | xx = [l1[8],l2[8],l3[8]] |
---|
110 | zz = (l1,l2,l3) |
---|
111 | for j in range(3): |
---|
112 | if yy[j] != '': |
---|
113 | assert yy[j] in xx, 'Invalid replacement option, %s' % yy[j] |
---|
114 | assert not self.repl.has_key( xx[j] ), 'duplicate replacement request for %s' % xx[j] |
---|
115 | self.repl[ xx[j] ] = yy[j] |
---|
116 | elif zz[j][10] == 'twin': |
---|
117 | self.twins.append( zz[j][8] ) |
---|
118 | elif zz[j][11] == 'update': |
---|
119 | tags = map( string.strip, string.split( zz[j][13], ',' ) ) |
---|
120 | self.upd[ xx[j] ] = { 'provNote':zz[j][12], 'tags':tags, 'label':zz[j][0], 'title':zz[j][1] } |
---|
121 | |
---|
122 | ### |
---|
123 | ### varDup and varMult created in first parse ----- then editted to select options |
---|
124 | ### 2nd pass through then generates the replace and remove options -- taking into account cross references |
---|
125 | ### the results of the 2nd pass go back to ../workbook to generate a new set of inputs. |
---|
126 | ### |
---|
127 | up = updates('varDup.csv', 'varMult.csv', idir='rev2') |
---|
128 | up.scandup() |
---|
129 | up.scanmult() |
---|
130 | |
---|
131 | urep = False |
---|
132 | urep = True |
---|
133 | if urep: |
---|
134 | oo = open( 'uuidreplace.csv', 'w' ) |
---|
135 | oo2 = open( 'uuidremove.csv', 'w' ) |
---|
136 | for k in up.repl.keys(): |
---|
137 | if inx.iref_by_uid.has_key(k): |
---|
138 | kn = up.repl[k] |
---|
139 | for tag,ki in inx.iref_by_uid[k]: |
---|
140 | vu = [ inx.uid.has_key(kk) for kk in [k,kn,ki] ] |
---|
141 | if all( vu ): |
---|
142 | oo.write( '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' % (k,kn,tag,ki, inx.uid[k].label, inx.uid[kn].label, inx.uid[ki].label) ) |
---|
143 | else: |
---|
144 | print 'ERROR.088.0001: Bad index in replace info: %s .. %s .. %s' % ( str([k,kn,ki]), str(vu), tag ) |
---|
145 | else: |
---|
146 | oo2.write( k + '\n' ) |
---|
147 | oo.close() |
---|
148 | oo2.close() |
---|
149 | oo = open( 'uuidupdate.csv', 'w' ) |
---|
150 | for k in up.upd.keys(): |
---|
151 | ee = up.upd[k] |
---|
152 | oo.write( string.join( [k,ee['provNote'],string.join(ee['tags']),ee['label'], ee['title'] ], '\t') + '\n' ) |
---|
153 | oo.close() |
---|
154 | else: |
---|
155 | oo2 = open( 'uuidremove2.csv', 'w' ) |
---|
156 | for i in dq.coll['var'].items: |
---|
157 | if not inx.iref_by_uid.has_key(i.uid): |
---|
158 | oo2.write( string.join( [i.uid,i.label,i.title,i.prov,i.description], '\t') + '\n' ) |
---|
159 | oo2.close() |
---|
160 | |
---|
161 | ### check back references. |
---|
162 | nbr = 0 |
---|
163 | lbr = [] |
---|
164 | for k in inx.iref_by_uid.keys(): |
---|
165 | if not inx.uid.has_key(k): |
---|
166 | nbr += 1 |
---|
167 | lbr.append(k) |
---|
168 | print 'Missing references: ', nbr |
---|
169 | ### can now apply mappings, create updated records and write to new xml? |
---|
170 | |
---|
171 | for i in dq.coll['requestLink'].items: |
---|
172 | rql_by_name[i.label].append( i.uid ) |
---|
173 | ix_rql_uid[i.uid] = i |
---|
174 | |
---|
175 | for i in dq.coll['requestVarGroup'].items: |
---|
176 | ix_rqvg_uid[i.uid] = i |
---|
177 | |
---|
178 | |
---|
179 | if dq.coll.has_key( 'revisedTabItem' ): |
---|
180 | thisk = 'revisedTabItem' |
---|
181 | else: |
---|
182 | thisk = 'requestVar' |
---|
183 | oo = open( 'uuidinsert.csv', 'w' ) |
---|
184 | for i in dq.coll[thisk].items: |
---|
185 | if i.uid == '__new__': |
---|
186 | if inx.var.label.has_key( i.label ): |
---|
187 | if len( inx.var.label[i.label] ) == 1: |
---|
188 | v = inx.uid[ inx.var.label[i.label][0] ] |
---|
189 | oo.write( string.join( ['unique',i.label,v.label,v.uid,v.prov,i.mip], '\t' ) + '\n' ) |
---|
190 | else: |
---|
191 | oo.write( string.join( ['ambiguous',i.label,i.mip,str(len(inx.var.label[i.label] ) ) ], '\t' ) + '\n' ) |
---|
192 | oo.close() |
---|
193 | |
---|
194 | oo = open( 'varMult.csv', 'w' ) |
---|
195 | oo2 = open( 'varDup.csv', 'w' ) |
---|
196 | oo3 = open( 'varStar.csv', 'w' ) |
---|
197 | hs = ['label','title','sn','units','description','prov','procnote','procComment','uid'] |
---|
198 | oo.write( string.join(hs, '\t' ) + '\n' ) |
---|
199 | oo2.write( string.join(hs, '\t' ) + '\n' ) |
---|
200 | oo3.write( string.join(hs, '\t' ) + '\n' ) |
---|
201 | ks = inx.var.label.keys() |
---|
202 | ks.sort() |
---|
203 | emptySet = sets.Set( ['','unset'] ) |
---|
204 | def entryEq(a,b): |
---|
205 | return a == b or (a in emptySet and b in emptySet) |
---|
206 | |
---|
207 | deferredRecs = [] |
---|
208 | for k in ks: |
---|
209 | if len(inx.var.label[k]) == 2: |
---|
210 | v1 = inx.var.uid[inx.var.label[k][0]] |
---|
211 | v2 = inx.var.uid[inx.var.label[k][1]] |
---|
212 | cc = map( lambda x: entryEq( v1.__dict__[x], v2.__dict__[x]), ['title','sn','units','description'] ) |
---|
213 | if all(cc): |
---|
214 | ### where duplicates are identical , collect and output at end of file. |
---|
215 | pv1 = string.find( v1.__dict__['prov'], 'OMIP.' ) != -1 |
---|
216 | pv2 = string.find( v2.__dict__['prov'], 'OMIP.' ) != -1 |
---|
217 | if pv2: |
---|
218 | vp = v2 |
---|
219 | vo = v1 |
---|
220 | else: |
---|
221 | if not pv1: |
---|
222 | print 'WARN.088.00002: no preference: %s, %s, %s' % (v1.__dict__['label'],v1.__dict__['prov'],v2.__dict__['prov']) |
---|
223 | vp = v1 |
---|
224 | vo = v2 |
---|
225 | deferredRecs.append( string.join(map( lambda x: vo.__dict__[x], hs) + [vp.uid,'identical'], '\t' ) + '\n' ) |
---|
226 | deferredRecs.append( string.join(map( lambda x: vp.__dict__[x], hs) + ['',''], '\t' ) + '\n' ) |
---|
227 | else: |
---|
228 | oo2.write( string.join(map( lambda x: v1.__dict__[x], hs) + ['',''], '\t' ) + '\n' ) |
---|
229 | oo2.write( string.join(map( lambda x: v2.__dict__[x], hs) + ['',''], '\t' ) + '\n' ) |
---|
230 | |
---|
231 | elif len(inx.var.label[k]) > 1: |
---|
232 | for i in inx.var.label[k]: |
---|
233 | oo.write( string.join(map( lambda x: inx.var.uid[i].__dict__[x], hs), '\t' ) + '\n' ) |
---|
234 | |
---|
235 | if k[-2:] == '--': |
---|
236 | for i in (inx.var.label[k] + inx.var.label[k[:-2]]): |
---|
237 | oo3.write( string.join(map( lambda x: inx.var.uid[i].__dict__[x], hs), '\t' ) + '\n' ) |
---|
238 | |
---|
239 | ## output auto-filled records for identical duplicates at end of varDup file. |
---|
240 | for r in deferredRecs: |
---|
241 | oo2.write( r ) |
---|
242 | oo.close() |
---|
243 | oo2.close() |
---|
244 | oo3.close() |
---|
245 | |
---|
246 | |
---|
247 | |
---|
248 | vns = inx.var.label.keys() |
---|
249 | vns.sort() |
---|
250 | for v in vns: |
---|
251 | if len( inx.var.label[v] ) > 1: |
---|
252 | print 'INFO.001.0001:',v, string.join( map( lambda x: inx.var.uid[x].sn, inx.var.label[v] ), ';' ) |
---|
253 | |
---|
254 | nok = 0 |
---|
255 | nerr = 0 |
---|
256 | if dq.coll.has_key( 'ovar' ): |
---|
257 | thisk = 'ovar' |
---|
258 | else: |
---|
259 | thisk = 'CMORvar' |
---|
260 | for i in dq.coll[thisk].items: |
---|
261 | vid = i.vid |
---|
262 | ix_ovar_uid[i.uid] = i |
---|
263 | xr_var_ovar[vid].append( i.uid ) |
---|
264 | if not inx.var.uid.has_key(vid): |
---|
265 | print 'missing key:',i.label, i.prov, vid |
---|
266 | nerr += 1 |
---|
267 | else: |
---|
268 | nok += 1 |
---|
269 | |
---|
270 | class rqHtml(object): |
---|
271 | |
---|
272 | def __init__(self,odir='./htmlSc/'): |
---|
273 | self.odir = odir |
---|
274 | if not os.path.isdir(odir): |
---|
275 | os.mkdir(odir) |
---|
276 | |
---|
277 | def mkRqlHtml(self,name): |
---|
278 | ## [u'comment', u'uid', u'tab', u'title', u'label', u'grid', 'defaults', u'objective', u'mip', 'globalDefault', u'gridreq'] |
---|
279 | if len( rql_by_name[name] ) == 1: |
---|
280 | self.mkRqlHtml01(rql_by_name[name][0], name ) |
---|
281 | else: |
---|
282 | self.mkRqlHtmlGp(name) |
---|
283 | |
---|
284 | def mkRqlHtmlGp(self,name): |
---|
285 | ee = {} |
---|
286 | ee['title'] = 'CMIP Request Link %s (with multiple definitions)' % name |
---|
287 | self.pageName = 'rql__%s.html' % name |
---|
288 | al =[] |
---|
289 | for i in range( len( rql_by_name[name] ) ): |
---|
290 | this = ix_rql_uid[rql_by_name[name][i]] |
---|
291 | al.append( tmpl.item % {'item':'<a href="rql__%s__%s.html">[%s]</a>: %s' % (name,i,i,this.title) } ) |
---|
292 | ee['items'] = string.join(al, '\n' ) |
---|
293 | ee['introduction'] = '' |
---|
294 | ee['htmlBody'] = tmpl.indexWrapper % ee |
---|
295 | ee['htmlHead'] = '''<title>%(title)s</title>''' % ee |
---|
296 | self.pageHtml = tmpl.pageWrapper % ee |
---|
297 | self.write() |
---|
298 | for i in range( len( rql_by_name[name] ) ): |
---|
299 | self.mkRqlHtml01(rql_by_name[name][i],i) |
---|
300 | |
---|
301 | def mkRqlHtml01(self,id, tag): |
---|
302 | this = ix_rql_uid[id] |
---|
303 | ee = {} |
---|
304 | if this.label == tag: |
---|
305 | ee['title'] = 'CMIP Request Link %s' % tag |
---|
306 | self.pageName = 'rql__%s.html' % tag |
---|
307 | else: |
---|
308 | ee['title'] = 'CMIP Request Link %s[%s]' % (this.label,tag) |
---|
309 | self.pageName = 'rql__%s__%s.html' % (this.label,tag) |
---|
310 | atts = this.__dict__.keys() |
---|
311 | atts.sort() |
---|
312 | al =[] |
---|
313 | for a in atts: |
---|
314 | if a not in ['defaults','globalDefault']: |
---|
315 | al.append( tmpl.item % {'item':'<b>%s</b>: %s' % (a,this.__dict__.get(a,'-- Not Set --')) } ) |
---|
316 | ee['items'] = string.join(al, '\n' ) |
---|
317 | ee['introduction'] = '' |
---|
318 | ee['htmlBody'] = tmpl.indexWrapper % ee |
---|
319 | ee['htmlHead'] = '''<title>%(title)s</title>''' % ee |
---|
320 | self.pageHtml = tmpl.pageWrapper % ee |
---|
321 | self.write() |
---|
322 | |
---|
323 | def mkVarHtml(self,name): |
---|
324 | if len( inx.var.label[name] ) == 1: |
---|
325 | self.mkVarHtml01(inx.var.label[name][0], name ) |
---|
326 | else: |
---|
327 | self.mkVarHtmlGp(name) |
---|
328 | |
---|
329 | def mkVarHtmlGp(self,name): |
---|
330 | ee = {} |
---|
331 | ee['title'] = 'CMIP Variable %s (with multiple definitions)' % name |
---|
332 | self.pageName = 'var__%s.html' % name |
---|
333 | al =[] |
---|
334 | for i in range( len( inx.var.label[name] ) ): |
---|
335 | this = inx.var.uid[inx.var.label[name][i]] |
---|
336 | al.append( tmpl.item % {'item':'<a href="var__%s__%s.html">[%s]</a>: %s' % (name,i,i,this.title) } ) |
---|
337 | ee['items'] = string.join(al, '\n' ) |
---|
338 | ee['introduction'] = '' |
---|
339 | ee['htmlBody'] = tmpl.indexWrapper % ee |
---|
340 | ee['htmlHead'] = '''<title>%(title)s</title>''' % ee |
---|
341 | self.pageHtml = tmpl.pageWrapper % ee |
---|
342 | self.write() |
---|
343 | ##print 'Multi var: %s' % name |
---|
344 | for i in range( len( inx.var.label[name] ) ): |
---|
345 | self.mkVarHtml01(inx.var.label[name][i],i) |
---|
346 | |
---|
347 | def mkVarHtml01(self,id, tag): |
---|
348 | this = inx.var.uid[id] |
---|
349 | ee = {} |
---|
350 | if this.label == tag: |
---|
351 | ee['title'] = 'CMIP Variable %s' % tag |
---|
352 | self.pageName = 'var__%s.html' % tag |
---|
353 | else: |
---|
354 | ee['title'] = 'CMIP Variable %s[%s]' % (this.label,tag) |
---|
355 | self.pageName = 'var__%s__%s.html' % (this.label,tag) |
---|
356 | atts = this.__dict__.keys() |
---|
357 | atts.sort() |
---|
358 | al =[] |
---|
359 | for a in atts: |
---|
360 | if a not in ['defaults','globalDefault']: |
---|
361 | al.append( tmpl.item % {'item':'<b>%s</b>: %s' % (a,this.__dict__.get(a,'-- Not Set --')) } ) |
---|
362 | |
---|
363 | if inx.iref_by_uid.has_key(this.uid): |
---|
364 | assert varRefs.has_key(this.uid), 'Problem with collected references' |
---|
365 | ee1 = varRefs[this.uid] |
---|
366 | ks = ee1.keys() |
---|
367 | ks.sort() |
---|
368 | for k in ks: |
---|
369 | al.append( tmpl.item % {'item':'<b>%s</b>: %s' % (k,string.join(ee1[k])) } ) |
---|
370 | ee['items'] = string.join(al, '\n' ) |
---|
371 | ee['introduction'] = '' |
---|
372 | ee['htmlBody'] = tmpl.indexWrapper % ee |
---|
373 | ee['htmlHead'] = '''<title>%(title)s</title>''' % ee |
---|
374 | self.pageHtml = tmpl.pageWrapper % ee |
---|
375 | self.write() |
---|
376 | |
---|
377 | def varHtml(self): |
---|
378 | for k in inx.var.label.keys(): |
---|
379 | self.mkVarHtml(k) |
---|
380 | |
---|
381 | def rqlHtml(self): |
---|
382 | for k in rql_by_name.keys(): |
---|
383 | self.mkRqlHtml(k) |
---|
384 | |
---|
385 | def write(self): |
---|
386 | oo = open( '%s/%s' % (self.odir,self.pageName), 'w' ) |
---|
387 | oo.write( self.pageHtml ) |
---|
388 | oo.close() |
---|
389 | |
---|
390 | |
---|
391 | vh = rqHtml() |
---|
392 | vh.varHtml() |
---|
393 | vh.rqlHtml() |
---|
394 | |
---|
395 | if nerr == 0: |
---|
396 | print 'CHECK 001: %s records checked, no missing references' % nok |
---|
397 | |
---|
398 | ##for k in xr_var_ovar.keys(): |
---|
399 | ##if len( xr_var_ovar[k] ) > 1: |
---|
400 | ##print inx.var.uid[k].label, map( lambda x: ix_ovar_uid[x].mipTable, xr_var_ovar[k] ) |
---|
401 | |
---|
402 | shps = {'': 64, 'XYZKT': 13, '4-element vector': 2, 'XYT': 476, '2D vector field ': 2, 'KZT': 4, '2D vector field': 2, 'XYZ': 27, 'XYZT': 204, '2D': 83, 'scalar': 14, 'XY': 88, '?': 21, '2D ': 1, 'XYKT': 3, 'YZT': 16, 'ZST1': 15, 'XKT': 2, 'BasinYT': 1} |
---|
403 | vshpchkMap = {'':'', u'all model levels above 400hPa':'alevStrat', u'all':'Xlev', 3.0:'plev3', '4.0':'plev4', \ |
---|
404 | 36.0:'plev36', u'soil levels':'sdepth', \ |
---|
405 | 1.0:'sfc?', \ |
---|
406 | 16.0:'plev16', 7.0:'plev7', 40.0:'plev40', u'all*':'Xlev', 14.0:'plev14', u'Model levels or 27Plevs':'alev|plev27', \ |
---|
407 | u'17 (or 23 )':'plev17|plev23', u'17 (or 23)':'plev17|plev23', \ |
---|
408 | 27.0:'plev27', 17.0:'plev17', u'17 (or23)':'plev17|plev23', 8.0:'plev8', u'all model levels':'alev', 5.0:'plev5'} |
---|
409 | ks = vshpchkMap.keys() |
---|
410 | for k in ks: |
---|
411 | if type(k) == type(0.): |
---|
412 | vshpchkMap[str(k)] = vshpchkMap[k] |
---|
413 | |
---|
414 | print vshpchkMap.keys() |
---|
415 | |
---|
416 | tsmap = { 'mean':[u'daily mean', u'time mean', u'time: day', |
---|
417 | u'Cumulative annual fraction', u'Time mean', u'weighted time mean', u'time: mean', u'mean', u'Mean'], |
---|
418 | '__unknown__':['','dummyAt'], |
---|
419 | 'point':[ u'Instantaneous (end of year)', u'point', u'Synoptic', u'instantaneous', u'time: point', u'synoptic'] } |
---|
420 | tsmap2 = {} |
---|
421 | for k in tsmap.keys(): |
---|
422 | for i in tsmap[k]: |
---|
423 | tsmap2[i] = k |
---|
424 | |
---|
425 | if dq.coll.has_key( 'groupItem' ): |
---|
426 | ee = collections.defaultdict( int ) |
---|
427 | for i in dq.coll['groupItem'].items: |
---|
428 | tst = tsmap2[ i.tstyle ] |
---|
429 | dd = '' |
---|
430 | if 'X' in i.shape: |
---|
431 | dd += 'latitude ' |
---|
432 | if 'Y' in i.shape: |
---|
433 | dd += 'longitude ' |
---|
434 | if 'Z' in i.shape: |
---|
435 | if i.levels == '': |
---|
436 | print 'ERROR.001.0001: no levels specified', i.label, i.title |
---|
437 | else: |
---|
438 | zdim = vshpchkMap[i.levels] |
---|
439 | dd += zdim |
---|
440 | ## print '%s::%s::%s|%s' % (i.shape, i.levels, i.tstyle, dd) |
---|
441 | for i in dq.coll['groupItem'].items: |
---|
442 | list_gp_ovar[i.gpid].append( i.uid ) |
---|
443 | |
---|
444 | nok = 0 |
---|
445 | nerr = 0 |
---|
446 | for i in dq.coll['groupItem'].items: |
---|
447 | vid = i.vid |
---|
448 | ix_gpi_uid[i.uid] = i |
---|
449 | xr_var_gpi[vid].append( i.uid ) |
---|
450 | if not inx.var.uid.has_key(vid): |
---|
451 | nerr += 1 |
---|
452 | else: |
---|
453 | nok += 1 |
---|
454 | print 'groupItem to var crossref: nok = %s, nerr = %s' % (nok, nerr) |
---|
455 | |
---|
456 | |
---|
457 | class tcmp(object): |
---|
458 | def __init__(self): |
---|
459 | pass |
---|
460 | def cmp(self,x,y): |
---|
461 | return cmp(x.title,y.title) |
---|
462 | |
---|
463 | def atRepr(l,x): |
---|
464 | if x != None: |
---|
465 | v = l.__dict__[x] |
---|
466 | else: |
---|
467 | v = l |
---|
468 | if v == '__unset__': |
---|
469 | return '' |
---|
470 | elif type(v) == type([]): |
---|
471 | return string.join([str(i) for i in v]) |
---|
472 | else: |
---|
473 | return v |
---|
474 | |
---|
475 | class xlsx(object): |
---|
476 | def __init__(self,fn): |
---|
477 | self.wb = xlsxwriter.Workbook(fn) |
---|
478 | |
---|
479 | def newSheet(self,name): |
---|
480 | self.worksheet = self.wb.add_worksheet(name=name) |
---|
481 | return self.worksheet |
---|
482 | |
---|
483 | def close(self): |
---|
484 | self.wb.close() |
---|
485 | |
---|
486 | def dumpxlsx( fn, key, atl): |
---|
487 | wb = xlsx( fn ) |
---|
488 | sht = wb.newSheet( key ) |
---|
489 | j = 0 |
---|
490 | for i in range(len(atl)): |
---|
491 | sht.write( j,i, atl[i] ) |
---|
492 | ll = dq.coll[key].items[:] |
---|
493 | ll.sort( tcmp().cmp ) |
---|
494 | for l in ll: |
---|
495 | uid = atRepr(l,'uid') |
---|
496 | j+=1 |
---|
497 | i=0 |
---|
498 | for x in atl: |
---|
499 | sht.write( j,i, atRepr(l,x) ) |
---|
500 | i+=1 |
---|
501 | if key == 'var' and refpix != None: |
---|
502 | if uid in refpix: |
---|
503 | p = refpix[uid][2] |
---|
504 | else: |
---|
505 | p = 201 |
---|
506 | sht.write( j,i, p ) |
---|
507 | wb.close() |
---|
508 | |
---|
509 | def dumpcsv( fn, key, atl ): |
---|
510 | oo = open(fn, 'w' ) |
---|
511 | ll = dq.coll[key].items[:] |
---|
512 | ll.sort( tcmp().cmp ) |
---|
513 | oo.write( string.join( atl, '\t' ) + '\n' ) |
---|
514 | for l in ll: |
---|
515 | try: |
---|
516 | oo.write( string.join( map( lambda x: str(atRepr(l,x)), atl), '\t' ) + '\n' ) |
---|
517 | except: |
---|
518 | print 'SEVERE.090.0001: print %s' % str(atl) |
---|
519 | print l |
---|
520 | raise |
---|
521 | oo.close() |
---|
522 | |
---|
523 | def atlSort( ll ): |
---|
524 | oo = [] |
---|
525 | l1 = ['label','title'] |
---|
526 | l2 = ['uid','defaults','globalDefault'] |
---|
527 | for i in l1: |
---|
528 | if i in ll: |
---|
529 | oo.append(i) |
---|
530 | ll.sort() |
---|
531 | for i in ll: |
---|
532 | if i not in l1 + l2: |
---|
533 | oo.append(i) |
---|
534 | if 'uid' in ll: |
---|
535 | oo.append( 'uid' ) |
---|
536 | return oo |
---|
537 | |
---|
538 | for k in dq.coll.keys(): |
---|
539 | if len( dq.coll[k].items ) > 0: |
---|
540 | expl = dq.coll[k].items[0] |
---|
541 | atl = atlSort( expl.__dict__.keys() ) |
---|
542 | atl1 = [a for a in atl if a != 'parent' and a[0] != '_'] |
---|
543 | print k, atl1 |
---|
544 | dumpcsv( 'csv2/%s.csv' % k, k, atl1 ) |
---|
545 | if k == 'var': |
---|
546 | dumpxlsx( 'csv2/var.xlsx', k, atl1 ) |
---|
547 | |
---|
548 | oo = open( 'var1.csv', 'w' ) |
---|
549 | ks = ['label','title','sn','units','description','prov','procnote','procComment','uid'] |
---|
550 | ks2 = [ 'ovar','groupItem','revisedTabItem'] |
---|
551 | oo.write( string.join(ks + ks2, '\t' ) + '\n' ) |
---|
552 | for i in dq.coll['var'].items: |
---|
553 | if i.label[-2:] != '--': |
---|
554 | ee1 = varRefs.get( i.uid, {} ) |
---|
555 | r2 = map( lambda x: string.join( atRepr( ee1.get(x, [] ), None ) ), ks2 ) |
---|
556 | oo.write( string.join(map( lambda x: atRepr(i,x), ks) + r2, '\t' ) + '\n' ) |
---|
557 | oo.close() |
---|
558 | |
---|
559 | class annotate(object): |
---|
560 | def __init__(self,src,dreq): |
---|
561 | assert os.path.isfile( src), '%s not found' % src |
---|
562 | self.doc = xml.dom.minidom.parse( src ) |
---|
563 | self.dreq = dreq |
---|
564 | self.removedUids = set() |
---|
565 | this = self.doc.getElementsByTagName('var')[0] |
---|
566 | dil = this.getElementsByTagName('item') |
---|
567 | self.vid = {} |
---|
568 | for item in dil: |
---|
569 | uid = item.getAttribute( 'uid' ) |
---|
570 | title = item.getAttribute( 'title' ) |
---|
571 | label = item.getAttribute( 'label' ) |
---|
572 | self.vid[uid] = (label,title) |
---|
573 | |
---|
574 | def strTtl(self,dq): |
---|
575 | this = self.doc.getElementsByTagName('structure')[0] |
---|
576 | dil = this.getElementsByTagName('item') |
---|
577 | |
---|
578 | filterStr = True |
---|
579 | for item in dil: |
---|
580 | uid = item.getAttribute( 'uid' ) |
---|
581 | lab = item.getAttribute( 'label' ) |
---|
582 | if len( dq.inx.iref_by_uid[uid] ) == 0 and filterStr: |
---|
583 | print 'UNUSED STRUCTURE: %s, %s' % (lab,uid) |
---|
584 | this.removeChild( item ) |
---|
585 | else: |
---|
586 | tmid = item.getAttribute( 'tmid' ) |
---|
587 | spid = item.getAttribute( 'spid' ) |
---|
588 | o = item.getAttribute( 'odims' ) |
---|
589 | c = item.getAttribute( 'coords' ) |
---|
590 | s = dq.inx.uid[spid] |
---|
591 | title = '%s, %s [%s]' % (dq.inx.uid[tmid].title, s.title, s.label) |
---|
592 | if string.strip( c ) != '' or string.strip( o ) != '': |
---|
593 | title += ' {%s:%s}' % (o,c) |
---|
594 | print 'STRUCT: ',title,o,c |
---|
595 | item.setAttribute( 'title', title ) |
---|
596 | |
---|
597 | |
---|
598 | def rvgCheck(self,dq): |
---|
599 | """Remove request variable groups which have no requestLink""" |
---|
600 | this = self.doc.getElementsByTagName('requestVarGroup')[0] |
---|
601 | dil = this.getElementsByTagName('item') |
---|
602 | nn = 0 |
---|
603 | for item in dil: |
---|
604 | uid = item.getAttribute( 'uid' ) |
---|
605 | if 'requestLink' not in dq.inx.iref_by_sect[uid].a: |
---|
606 | this.removeChild(item) |
---|
607 | self.removedUids.add( uid ) |
---|
608 | nn+=1 |
---|
609 | print 'Unused variable groups removed: %s' % nn |
---|
610 | this = self.doc.getElementsByTagName('requestVar')[0] |
---|
611 | dil = this.getElementsByTagName('item') |
---|
612 | nn = 0 |
---|
613 | s1 = {i.uid for i in dq.coll['requestVarGroup'].items if i.uid not in self.removedUids} |
---|
614 | for item in dil: |
---|
615 | uid = item.getAttribute( 'uid' ) |
---|
616 | vid = item.getAttribute( 'vid' ) |
---|
617 | vgid = item.getAttribute( 'vgid' ) |
---|
618 | if vgid not in s1: |
---|
619 | this.removeChild(item) |
---|
620 | self.removedUids.add( uid ) |
---|
621 | nn+=1 |
---|
622 | print 'Unused request variables removed: %s' % nn |
---|
623 | |
---|
624 | |
---|
625 | def cmvCheck(self,dq): |
---|
626 | this = self.doc.getElementsByTagName('CMORvar')[0] |
---|
627 | dil = this.getElementsByTagName('item') |
---|
628 | kk = 0 |
---|
629 | kka = 0 |
---|
630 | for item in dil: |
---|
631 | title = item.getAttribute( 'title' ) |
---|
632 | if title[:6] == '__from': |
---|
633 | kka += 1 |
---|
634 | vid = item.getAttribute( 'vid' ) |
---|
635 | if vid in self.vid: |
---|
636 | title2 = self.vid[vid][1] |
---|
637 | item.setAttribute( 'title', title2 ) |
---|
638 | kk += 1 |
---|
639 | print ('CMOR Var titles reset: %s [%s]' % (kk,kka)) |
---|
640 | |
---|
641 | def mipProv(self,dq): |
---|
642 | s1 = re.compile( '\[([a-zA-Z0-9]*)\]' ) |
---|
643 | cc = collections.defaultdict(int) |
---|
644 | dd = collections.defaultdict(int) |
---|
645 | for i in dq.coll['var'].items: |
---|
646 | cc[i.prov] += 1 |
---|
647 | |
---|
648 | ee = {} |
---|
649 | for i in sorted( cc.keys() ): |
---|
650 | if i[:9] == 'CMIP6 end': |
---|
651 | m = s1.findall( i ) |
---|
652 | assert len( m ) == 1, 'FAILED TO PARSE: %s' % i |
---|
653 | this = m[0] |
---|
654 | else: |
---|
655 | i5 = i.find( 'CMIP5' ) != -1 |
---|
656 | io = i.find( 'OMIP' ) != -1 |
---|
657 | icx = i.find( 'CORDEX' ) != -1 |
---|
658 | ip = i.find( 'PMIP' ) != -1 |
---|
659 | icc = i.find( 'CCMI' ) != -1 |
---|
660 | isp = i.find( 'SPECS' ) != -1 |
---|
661 | icf = i.find( 'CFMIP' ) != -1 |
---|
662 | if i5 and io: |
---|
663 | print 'WARNING .. unclear provenance: ',i,cc[i] |
---|
664 | this = 'CMIP5/OMIP' |
---|
665 | elif i5: |
---|
666 | this = 'CMIP5' |
---|
667 | elif io: |
---|
668 | this = 'OMIP' |
---|
669 | elif icx: |
---|
670 | this = 'CORDEX' |
---|
671 | elif ip: |
---|
672 | this = 'PMIP' |
---|
673 | elif icc: |
---|
674 | this = 'CCMI' |
---|
675 | elif isp: |
---|
676 | this = 'SPECS' |
---|
677 | elif icf: |
---|
678 | this = 'CFMIP' |
---|
679 | else: |
---|
680 | print 'WARNING .. unclear provenance [2]: ',i,cc[i] |
---|
681 | this = 'unknown' |
---|
682 | |
---|
683 | ee[i] = this |
---|
684 | dd[this] += cc[i] |
---|
685 | self.dd = dd |
---|
686 | self.ee = ee |
---|
687 | this = self.doc.getElementsByTagName('var')[0] |
---|
688 | dil = this.getElementsByTagName('item') |
---|
689 | print 'FIXING var provmip attribute, %s items' % len(dil) |
---|
690 | kk = 0 |
---|
691 | for item in dil: |
---|
692 | kk += 1 |
---|
693 | p = item.getAttribute( 'prov' ) |
---|
694 | assert ee.has_key(p), 'Unmatched key: %s' % p |
---|
695 | item.setAttribute( 'provmip', ee[p] ) |
---|
696 | |
---|
697 | def fixCellMethods(self,dq): |
---|
698 | this = self.doc.getElementsByTagName('structure')[0] |
---|
699 | dil = this.getElementsByTagName('item') |
---|
700 | cmrep = collections.defaultdict( set ) |
---|
701 | cmc = collections.defaultdict( int ) |
---|
702 | nrep = 0 |
---|
703 | for item in dil: |
---|
704 | cm = item.getAttribute( 'cell_methods' ) |
---|
705 | if string.find( cm, "area: where" ) != -1: |
---|
706 | cm1 = string.replace( cm, "area: where", "area: mean where" ) |
---|
707 | item.setAttribute( 'cell_methods', cm1 ) |
---|
708 | cmrep[cm].add(cm1) |
---|
709 | cmc[cm1] += 1 |
---|
710 | nrep += 1 |
---|
711 | elif string.find( cm, "time:mean" ) != -1: |
---|
712 | cm1 = string.replace( cm, "time:mean", "time: mean" ) |
---|
713 | item.setAttribute( 'cell_methods', cm1 ) |
---|
714 | cmrep[cm].add(cm1) |
---|
715 | cmc[cm1] += 1 |
---|
716 | nrep += 1 |
---|
717 | elif string.find( cm, "weighted b " ) != -1: |
---|
718 | cm1 = string.replace( cm, "weighted b ", "weighted by " ) |
---|
719 | item.setAttribute( 'cell_methods', cm1 ) |
---|
720 | cmrep[cm].add(cm1) |
---|
721 | cmc[cm1] += 1 |
---|
722 | nrep += 1 |
---|
723 | print ('FIXED CELL METHODS .. %s' % nrep ) |
---|
724 | for k in cmc: |
---|
725 | print ('%s: %s' % (k,cmc[k]) ) |
---|
726 | |
---|
727 | |
---|
728 | def sectionCopy(self,dq): |
---|
729 | this = self.doc.getElementsByTagName('CMORvar')[0] |
---|
730 | thisRqv = self.doc.getElementsByTagName('requestVar')[0] |
---|
731 | xx = [i for i in dq.coll['requestVarGroup'].items if i.label == 'OMIP-Omon'] |
---|
732 | assert len(xx) == 1, 'OMIP-Omon request variable group not found' |
---|
733 | omipOmonUid = xx[0].uid |
---|
734 | dil = this.getElementsByTagName('item') |
---|
735 | for item in dil: |
---|
736 | mipt = item.getAttribute( 'mipTable' ) |
---|
737 | prov = item.getAttribute( 'prov' ) |
---|
738 | provn = item.getAttribute( 'provNote' ) |
---|
739 | if mipt == 'Oyr' and prov[:12] == "CMIP6 [OMIP]" and provn == 'bgc': |
---|
740 | rowix = int( item.getAttribute( 'rowIndex' ) ) |
---|
741 | if rowix < 65: |
---|
742 | var = item.getAttribute( 'label' ) |
---|
743 | new = item.cloneNode(True) |
---|
744 | new.setAttribute( 'defaultPriority', '2' ) |
---|
745 | new.setAttribute( 'mipTable', 'Omon' ) |
---|
746 | new.setAttribute( 'prov', 'Copy from Oyr' ) |
---|
747 | new.setAttribute( 'provNote', 'sdq.001' ) |
---|
748 | vid = str( uuid.uuid1() ) |
---|
749 | new.setAttribute( 'uid', vid ) |
---|
750 | this.appendChild( new ) |
---|
751 | ## |
---|
752 | ## create request var |
---|
753 | ## |
---|
754 | new2 = self.doc.createElement( 'item' ) |
---|
755 | uid = str( uuid.uuid1() ) |
---|
756 | new2.setAttribute( 'uid', uid ) |
---|
757 | new2.setAttribute( 'priority', '2' ) |
---|
758 | new2.setAttribute( 'vid', vid ) |
---|
759 | new2.setAttribute( 'vgid', omipOmonUid ) |
---|
760 | new2.setAttribute( 'mip', 'OMIP' ) |
---|
761 | new2.setAttribute( 'table', 'OMIP-Omon' ) |
---|
762 | thisRqv.appendChild(new2) |
---|
763 | |
---|
764 | |
---|
765 | def missingRefs(self,mrefs,clear=True): |
---|
766 | this = self.doc.getElementsByTagName('remarks')[0] |
---|
767 | if clear: |
---|
768 | dil = this.getElementsByTagName('item') |
---|
769 | for d in dil: |
---|
770 | this.removeChild(d) |
---|
771 | for k in mrefs.keys(): |
---|
772 | if len( mrefs[k] ) == 1: |
---|
773 | tid = mrefs[k][0][2] |
---|
774 | tattr = mrefs[k][0][1] |
---|
775 | tn = None |
---|
776 | else: |
---|
777 | tid = None |
---|
778 | ee = collections.defaultdict(int) |
---|
779 | tn = str( len( mrefs[k] ) ) |
---|
780 | for t in mrefs[k]: |
---|
781 | s = self.dreq.inx.uid[t[2]]._h.label |
---|
782 | ee['%s.%s' % (s,t[1])] += 1 |
---|
783 | if len( ee.keys() ) == 1: |
---|
784 | tattr = ee.keys()[0] |
---|
785 | else: |
---|
786 | tattr = '__multiple__' |
---|
787 | if k not in self.removedUids: |
---|
788 | item = self.doc.createElement( 'item' ) |
---|
789 | item.setAttribute( 'uid', k ) |
---|
790 | item.setAttribute( 'tattr', tattr ) |
---|
791 | if tn != None: |
---|
792 | item.setAttribute( 'techNote', tn ) |
---|
793 | if tid != None: |
---|
794 | item.setAttribute( 'tid', tid ) |
---|
795 | item.setAttribute( 'class', 'missingLink' ) |
---|
796 | item.setAttribute( 'description', 'Missing links detected and marked for fixing' ) |
---|
797 | item.setAttribute( 'prov', 'scanDreq.py:annotate' ) |
---|
798 | this.appendChild( item ) |
---|
799 | |
---|
800 | txt = self.doc.toprettyxml(indent='\t', newl='\n', encoding=None) |
---|
801 | oo = open( 'out/annotated_20150731.xml', 'w' ) |
---|
802 | lines = string.split( txt, '\n' ) |
---|
803 | for line in lines: |
---|
804 | l = utils_wb.uniCleanFunc( string.strip(line) ) |
---|
805 | if empty.match(l): |
---|
806 | continue |
---|
807 | else: |
---|
808 | oo.write(l + '\n') |
---|
809 | oo.close() |
---|
810 | |
---|
811 | doAnno = True |
---|
812 | if doAnno: |
---|
813 | an = annotate( dq.c.vsamp, dq ) |
---|
814 | ###an.sectionCopy(dq) |
---|
815 | an.fixCellMethods(dq) |
---|
816 | an.mipProv(dq) |
---|
817 | an.cmvCheck(dq) |
---|
818 | an.rvgCheck(dq) |
---|
819 | an.strTtl(dq) |
---|
820 | an.missingRefs( dq.inx.missingIds ) |
---|