Changeset 375 for CMIP6dreqbuild
- Timestamp:
- 02/09/15 17:35:01 (7 years ago)
- Location:
- CMIP6dreqbuild/trunk
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
CMIP6dreqbuild/trunk/docs/vocab.txt
r370 r375 1 1 ## vsf=v 2 2 vocab institute; Institute; cmip.drv.001; def; 0; 1; Yes 3 - url; isni 3 - url {Home page} 4 - isni {International Standard Name Identifier (ISO 27729)} 4 5 - description 5 6 - uuid -
CMIP6dreqbuild/trunk/src/framework/scanDreq.py
r373 r375 8 8 empty=re.compile('^$') 9 9 10 dq = dreq.loadDreq() 10 src1 = '../workbook/trial_20150831.xml' 11 12 dq = dreq.loadDreq(dreqXML=src1) 11 13 inx = dq.inx 12 14 ##inx.makeVarRefs() -
CMIP6dreqbuild/trunk/src/workbook/importShDreq.py
r372 r375 309 309 from scansh import rq 310 310 m = main(sampleXml, rq) 311 if len(m.err0010.keys()) > 0: 312 ks = m.err0010.keys() 313 ks.sort() 314 for k in ks: 315 print 'ERROR.001.0010: variable group not identified: %s [%s]' % (k,m.err0010[k]) -
CMIP6dreqbuild/trunk/srcMisc/dreqSX.py
r374 r375 43 43 ## dictionary to carry short names of old variables. 44 44 eeold = {} 45 for ee,shv in [(ee0,sh),(eenv,shnv),(eeold,shold)]: 45 ###for ee,shv in [(ee0,sh),(eenv,shnv),(eeold,shold)]: 46 for ee,shv in [(ee0,sh),(eenv,shnv)]: 46 47 for k in shv.keys(): 47 48 ee[k] = shv[k] 48 49 49 for ee,fn in [(ee_xref,'vlsc5_xref_v20150713.xls'),(ee_mip,'vlsc5_mip_v20150713.xls')]: 50 wb = workbook( fn ) 51 sh = wb.book.sheet_by_name( 'Sheet1' ) 52 for i in range(sh.nrows): 53 r = map( lambda x: string.strip(str(uniCleanFunc(x.value))), sh.row(i) ) 54 ee[r[0]] = r 50 rmod = 'csv' 51 eeoldl = collections.defaultdict( list ) 52 if rmod == 'xls': 53 for ee,fn in [(ee_xref,'vlsc5_xref_v20150826.xls'),(ee_mip,'vlsc5_mip_v20150826.xls'),(eeold,'vlsc5b_v20150826.xls')]: 54 wb = workbook( fn ) 55 sh = wb.book.sheet_by_name( 'Sheet1' ) 56 for i in range(sh.nrows): 57 r = map( lambda x: string.strip(str(uniCleanFunc(x.value))), sh.row(i) ) 58 ee[r[0]] = r 59 else: 60 for ee,fn,sep,xxx in [(ee_xref,'vlsc5_xref_v20150826.csv',',',False),(ee_mip,'vlsc5_mip_v20150826.csv', '\t',False),(eeoldl,'vlsc5b_v20150826.csv', '\t',True)]: 61 for i in open(fn).readlines(): 62 r = map( lambda x: string.strip(str(uniCleanFunc(x))), string.split( string.strip(i), sep ) ) 63 while len(r) < 9: 64 r.append( '' ) 65 if xxx: 66 ee[r[0]].append( r ) 67 else: 68 ee[r[0]] = r 69 70 71 for h in eeoldl.keys(): 72 if len( eeoldl[h] ) == 1: 73 eeold[h] = eeoldl[h][0] 74 else: 75 ll = [] 76 for k in range( len( eeoldl[h] ) ): 77 if eeoldl[h][k][8] == '': 78 ll.append(k) 79 if len(ll) == 0: 80 print 'ERROR.004.0001: no unforwarded record in vlsc5b for h=%s' % h 81 eeold[h] = eeoldl[h][0] 82 elif len(ll) > 1: 83 print 'ERROR.004.0002: multiple unforwarded records in vlsc5b for h=%s (%s)' % (h,eeoldl[h][ll[0]][1]) 84 eeold[h] = eeoldl[h][ll[0]] 85 else: 86 eeold[h] = eeoldl[h][ll[0]] 55 87 56 88 k2m = [] … … 70 102 ## had bug here due to extension of ee_mip to include dimensions. 71 103 ## 104 ee_byvar = collections.defaultdict(list) 72 105 for k in ee_mip.keys(): 73 106 tid = ee_mip[k][-2] … … 75 108 ee_miptables_vars_old[tid].append( ee_mip[k][-3] ) 76 109 ee_ovars_old['%s.%s' % (tid,ee_mip[k][-3]) ].append( k ) 110 ee_byvar[ ee_mip[k][-3] ].append( k ) 77 111 78 112 for s in shlist: -
CMIP6dreqbuild/trunk/srcMisc/sumMip_pp.py
r374 r375 1 2 1 import extractMipInfo as e 3 import hashlib, collections, string, uuid 2 import hashlib, collections, string, uuid, sys 3 import dreq_cfg 4 4 5 5 from utils_wb import workbook … … 8 8 9 9 eeatKeys = ['comment', 'deflate_level', 'shuffle', 'ok_max_mean_abs', 'flag_meanings', 'type', 'ok_min_mean_abs', 'standard_name', 'deflate', 'long_name', 'valid_min', 'cell_methods', 'flag_values', 'cell_measures', 'out_name', 'modeling_realm', 'units', '#cell_methods', 'valid_max', 'positive'] 10 11 shKeys = ['priority', 'long name', 'units', 'comment', 'questions & notes', 'output variable name', 'CF Standard name', 'unconfirmed or proposed standard name', 'unformatted units', 'cell_methods', 'valid min', 'valid max', 'mean absolute min', 'mean absolute max', 'positive', 'type', 'CMOR dimensions', 'CMOR variable name', 'realm', 'frequency', 'cell_measures', 'flag_values', 'flag_meanings' ] 12 13 x = [('long name','long_name'),('output variable name','out_name'),('CF Standard name','standard_name'),('valid min','valid_min'),('valid max','valid_max'),('mean absolute min','ok_min_mean_abs'),('mean absolute max','ok_max_mean_abs'),('CMOR dimensions','dims'),('CMOR variable name','var'),('realm','modeling_realm')] 14 ### sheet columns which have no MIP table equiv. 15 y = ['priority', 'questions & notes', 'unconfirmed or proposed standard name','unformatted units','frequency'] 16 ## MIP table entries which have no workbook source. 17 y2 = ['deflate_level', 'shuffle', 'deflate', '#cell_methods' ] 18 10 19 eeatKeysPlus = eeatKeys + ['var','mip','dims'] 20 ktr1 = {} 21 for k in eeatKeysPlus: 22 if k in shKeys: 23 ktr1[k] = k 24 25 for t in x: 26 ktr1[t[1]] = t[0] 27 28 dc = dreq_cfg.rqcfg() 29 eeomip = [] 30 for f in dc.ff['OMIP']: 31 wbx = workbook( dc.dir0 + 'OMIP/' + f ) 32 for s in wbx.sns: 33 if s != 'general': 34 sh = wbx.book.sheet_by_name( s ) 35 started = False 36 for i in range(sh.nrows): 37 r = map( lambda x: x.value, sh.row(i) ) 38 if started: 39 k2 = str( uuid.uuid1() ) 40 this = [k2,] 41 for k in eeatKeysPlus: 42 if k == 'mip': 43 this.append( 'OMIP.%s' % s ) 44 elif ktr1.has_key(k): 45 this.append( r[ shKeys.index( ktr1[k] ) ] ) 46 else: 47 this.append( '' ) 48 eeomip.append( this ) 49 elif r[0] == 'priority': 50 started = True 11 51 12 52 def mipr2l( dd1 ): … … 69 109 h = hashlib.md5(string.join(t) ).hexdigest() 70 110 ee[h] = t 71 eee[h].append( z ) 111 ### list of mip tables referencing this item 112 eee[h] += z 72 113 73 114 ## need to add v & m .... … … 83 124 eeh[h].append(k2) 84 125 126 #### this is the wrong use of eeh ---- should be key referencing another ...... 127 #### h is a hash of var, ln, sn, comment, units, used as key of "ee" 128 ee_extra = {} 129 for r in eeomip: 130 v = r[-3] 131 k2 = r[0] 132 u = r[17] 133 if u == 1.: 134 u = '1' 135 if u == .001: 136 u = '0.001' 137 ## 138 ## need to deal with these rogue entries later as well 139 ## 140 if v not in ['',29]: 141 t = (v,r[10],r[8],r[1],u) 142 h = hashlib.md5(string.join(t) ).hexdigest() 143 eeh[h].append(k2) 144 ee_extra[h] = t 145 eee[h].append( r[-2] ) 146 85 147 def mjoin( ll ): 86 148 oo = '' … … 94 156 wb = workbook( 'vlsc3_ref_01-04.xls' ) 95 157 sh = wb.book.sheet_by_name( 'Sheet1' ) 96 vref = {}158 vref = collections.defaultdict( list ) 97 159 vrefh = collections.defaultdict( list ) 98 160 for i in range(sh.nrows): … … 112 174 print r 113 175 raise 114 vref[h] = s + r[5:]176 vref[h].append( s + r[5:] ) 115 177 vrefh[k].append( h ) 116 178 een = {} … … 124 186 icsh = [] 125 187 for h in ee: 126 this = list(ee[h]) + [str(eee[h][0])[1:-1],] 188 ######''' 189 this = list(ee[h]) + [string.join(eee[h],','), ] 127 190 k = this[0] 128 191 if not vrefh.has_key(k): 129 192 een[h] = ee[h] 130 193 print 'ERR.001.0001: ',str(this) 131 if this[-1] == " 'CCMI1_satdaily'":194 if this[-1] == "CCMI1_satdaily": 132 195 icsh.append(h) 133 196 ics += 1 … … 138 201 print '###############################' 139 202 for h2 in vrefh[k]: 140 print vref[h2][ :5]203 print vref[h2][0][:5] 141 204 print '+++++++++++++++++++++++++++++++' 142 205 … … 176 239 if h not in icsh: 177 240 idk += 1 178 this = [h,] + list(ee[h]) + [string.join(eee[h] [0],','), 'mv.001.%4.4i' % idk]241 this = [h,] + list(ee[h]) + [string.join(eee[h],','), 'mv.001.%4.4i' % idk] 179 242 assert vref.has_key(h), 'key %s not found' % h 180 ll = vref[h][:]181 ll[:5] = ee[h][:5]182 this = '%s\t' % h183 for c in ll:184 try:185 this += '%s\t' % str(c)186 except:187 print c188 print ll189 raise243 for ll in vref[h]: 244 ll[:5] = ee[h][:5] 245 this = '%s\t' % h 246 for c in ll: 247 try: 248 this += '%s\t' % str(c) 249 except: 250 print c 251 print ll 252 raise 190 253 ## need to append trailing columns 191 254 #oo.write( "%s\t%s\t%s\t%s\t'%s'\t%s\t%s\t%s\t\n" % tuple(this) ) 192 oo.write( "%s\n" % this ) 193 255 oo.write( "%s\n" % this ) 256 257 eevr = {} 258 eevr2 = {} 194 259 for h,k in vrefmissing: 195 260 assert vref.has_key(h), 'key %s not found [2]' % h 196 this = '%s\t' % h 197 for c in vref[h]: 198 this += '%s\t' % c 199 kmv = vref[h][6] 200 assert kmv[:3] == 'mv.', 'ERROR.001.0011: unexpected mv identifier: %s, %s ' % (kmv, str(this)) 201 if kmv[:3] == 'mv.': 202 if ee2.has_key(kmv): 203 r = map( str, ee2[kmv] ) 204 r2 = map( str, vref[h] ) 205 if r[1:5] != r2[:4] or (r[6:9] != r2[5:8]): 206 print str(vref[h]) 207 print str(r) 208 print map( lambda x,y: x == y, r[1:9], r2[:8] ) 209 print '------------------------------------------------' 210 oo.write( "%s\n" % this ) 261 for ll in vref[h]: 262 this = '%s\t' % h 263 for c in ll: 264 this += '%s\t' % c 265 kmv = ll[6] 266 assert kmv[:3] == 'mv.', 'ERROR.001.0011: unexpected mv identifier: %s, %s ' % (kmv, str(this)) 267 if kmv[:3] == 'mv.': 268 if ee2.has_key(kmv): 269 r = map( str, ee2[kmv] ) 270 r2 = map( str, ll ) 271 if r[1:5] != r2[:4] or (r[6:9] != r2[5:8]): 272 print str(ll) 273 print str(r) 274 print map( lambda x,y: x == y, r[1:9], r2[:8] ) 275 print '------------------------------------------------' 276 hp = '%s.%s' % (h,kmv) 277 if not eevr2.has_key(hp): 278 eevr2[hp] = k 279 eevr[h] = k 280 oo.write( "%s\n" % this ) 211 281 ## need to output additional rows. 282 idkp = 0 283 for h in ee_extra.keys(): 284 if not (ee.has_key(h) or eevr.has_key(h)): 285 idkp += 1 286 this0 = list(ee_extra[h]) + [string.join(eee[h],','), 'mv.007.%4.4i' % idkp] 287 this = '%s\t' % h 288 for c in this0: 289 try: 290 this += '%s\t' % str(c) 291 except: 292 print c 293 print ll 294 raise 295 oo.write( "%s\n" % this ) 296 212 297 oo.close() 213 298 oo=open( 'vlsc5_xref_v%s.csv' % yyy,'w') … … 220 305 this = string.join( eel[u], '\t' ) 221 306 oo.write( '%s\t%s\n' % (u,this) ) 307 for r in eeomip: 308 this = string.join( map( str, r), '\t' ) 309 oo.write( '%s\n' % (this) ) 222 310 oo.close() 223 311 print eeat.keys() -
CMIP6dreqbuild/trunk/srcMisc/sx2.py
r374 r375 253 253 nnf = 0 254 254 nnm = 0 255 nno = 0 255 256 for k in sx.ee_mip.keys(): 256 257 t = map( lambda x: sx.ee_mip[k][x], [15,10,8,1,17] ) 257 258 h = hashlib.md5(string.join(t) ).hexdigest() 258 259 if not sx.ee_xref_bck.has_key(k): 259 print 'ERROR.002.0010: missing back reference',sx.ee_mip[k] 260 nnf += 1 260 r1 = sx.ee_mip[k][1] 261 if r1 in ['','29'] or r1[:23] == 'Reference to literature' or r1[:15] == '(copied from 3D': 262 print 'WARN.002.0010: dud record',sx.ee_mip[k], sx.ee_mip[k][-2][:4] 263 else: 264 print 'ERROR.002.0010: missing back reference',sx.ee_mip[k], sx.ee_mip[k][-2][:4] 265 nnf += 1 261 266 elif len(sx.ee_xref_bck[k]) > 1: 262 267 nnm += 1 … … 266 271 print sx.eeold[k2] 267 272 268 print '#### Number of missing back references: ',nnf273 print '#### Number of missing back references: %s (%s)' % (nnf,nno) 269 274 270 275 … … 660 665 sh[k] = r[:] 661 666 sh.close() 662 663 667 664 668 sh = shelve.open( '%s/requestLinks' % odir, 'n' ) … … 683 687 ## link to group items is now via mip + group id, using stuff from kmapk2 .... 684 688 685 dorest = False 689 dorest = True 690 691 class psort(object): 692 693 def __init__(self): 694 pass 695 696 def p(self,z): 697 return tabpri.get(z,9) 698 699 def cmp(self,x,y): 700 return cmp( self.p(x), self.p(y) ) 701 686 702 if dorest: 703 tabpri = { 'SPECS':2, 'CMIP5':0, 'CORDEX':3, 'PMIP3':4 } 687 704 688 705 gg = {} … … 700 717 vdefex = {} 701 718 for k in sx.ee_mip.keys(): 719 omit = False 702 720 if not sx.ee_xref_bck.has_key( k ): 721 r1 = sx.ee_mip[k][1] 722 if r1 in ['','29'] or r1[:23] == 'Reference to literature' or r1[:15] == '(copied from 3D': 723 print 'WARN.002.0003: dud record',sx.ee_mip[k], sx.ee_mip[k][-2][:4] 724 omit = True 725 elif sx.ee_mip[k][-2][:4] == 'OMIP': 726 print 'ERROR.002.0004: no back references %s' % str(sx.ee_mip[k]) 727 else: 728 print 'ERROR.002.0003: no back references %s' % str(sx.ee_mip[k]) 703 729 kl = ['__none__',] 704 print 'ERROR.0002.003: no back references %s' % str(sx.ee_mip[k])705 730 vnam0 = '' 706 731 else: 707 732 kl = sx.ee_xref_bck[ k ] 708 733 if len(kl) != 1: 709 print 'ERROR.00 02.001: multiple back references %s' % str(kl)734 print 'ERROR.002.0001: multiple back references %s' % str(kl) 710 735 print sx.ee_mip[k] 711 736 kv = kl[0] … … 713 738 vnam0 = r[1] 714 739 kit = 0 715 while r[8] not in [ '','null'] and r[8][0] != '*': 740 itl = [] 741 while r[8] not in [ '','null','Depricated'] and r[8][0] != '*': 742 itl.append(r) 716 743 if ee_mv0.has_key(r[8]): 717 744 kv = ee_mv0[ r[8] ] 718 745 r = sx.eeold[kv] 719 746 else: 720 print 'ERROR.00 02.002: ambiguos back reference: %s' % str(r)747 print 'ERROR.002.0002: ambiguous back reference: %s' % str(r) 721 748 kit += 1 722 assert kit < 5, 'Too many iterations: %s ' % str(r)749 assert kit < 5, 'Too many iterations: %s,\n%s' % (str(r),str(itl)) 723 750 if kit > 1: 724 print 'WARNING.00 02.001: chained reference: %s' % str(r)751 print 'WARNING.002.0001: chained reference: %s' % str(r) 725 752 kl = [kv,] 726 if r[1] != vnam0: 727 print 'INFO.0001.001: name change:',r,vnam0 728 vdefex[k] = list(r) + [vnam0,] 729 sh[k] = sx.ee_mip[k][:] + [kl[0],] 753 if not omit: 754 if r[1] != vnam0: 755 print 'INFO.0001.001: name change:',r,vnam0 756 vdefex[k] = list(r) + [vnam0,] 757 sh[k] = sx.ee_mip[k][:] + [kl[0],] 730 758 sh.close() 731 759 … … 748 776 749 777 for m,t in k0: 778 print 'INFO.003.0001: ',m,t 750 779 vars = shb[ '%s.%s' % (m,t) ] 751 780 ee = {} 752 781 ff = {} 753 rk = 'CMIP5_%s' % t 782 if m == "OMIP": 783 rk = 'OMIP.%s' % t 784 else: 785 rk = 'CMIP5_%s' % t 754 786 if not sx.ee_miptables_old.has_key(rk): 755 print ' ####################key %s not found ' % rk787 print 'ERROR.003.0001: key %s not found ' % rk 756 788 else: 757 789 for u in sx.ee_miptables_old[rk]: … … 774 806 thisu = '__new__' 775 807 else: 776 print 'ERROR.003.0001 var not found: %s (%s, %s)' % (v,m,t) 808 thisu = None 809 if sx.ee_byvar.has_key( v ): 810 tabs = map( lambda x: sx.ee_mip[x][-2], sx.ee_byvar[v] ) 811 tbs = map( lambda x: string.split(x, '_' )[-1], tabs ) 812 cls = map( lambda x: string.split(x, '_' )[0], tabs ) 813 if t in tbs: 814 cl0 = [] 815 for k in range(len(tbs)): 816 if tbs[k] == t: 817 cl0.append( cls[k] ) 818 819 cl0.sort( psort().cmp ) 820 ix = tabs.index( '%s_%s' % (cl0[0],t) ) 821 print 'WARN.003.0001 var not found -- alternative table used [%s]: %s (%s, %s)' % (cl0[0],v,m,t) 822 thisu = sx.ee_byvar[v][ix] 823 print sx.ee_mip[thisu] 824 825 ##print map( lambda x: sx.ee_mip[x][-2], sx.ee_byvar[v] ) 777 826 827 if thisu == None: 828 print 'ERROR.003.0001 var not found: %s (%s, %s)' % (v,m,t) 778 829 if m == 'OMIP': 779 830 kr += 1 … … 782 833 dop.a['omip2'].write( string.join( map(lambda x: sr[x],[4,0,5,2,7]) + ['%s_%s' % (m,t),'mv.005.%4.4i' % kr], '\t' ) + '\n' ) 783 834 784 ## not finding OMIP Oyr variables ... even where they are in vlsc5 and hencd in shold. Problem is table ref.785 ## need to look at new names as well ... as renamed in, e.g. OMIP786 thisu = None787 835 else: 788 836 thisu = ee[v]
Note: See TracChangeset
for help on using the changeset viewer.