Changeset 375


Ignore:
Timestamp:
02/09/15 17:35:01 (4 years ago)
Author:
mjuckes
Message:

various

Location:
CMIP6dreqbuild/trunk
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • CMIP6dreqbuild/trunk/docs/vocab.txt

    r370 r375  
    11## vsf=v 
    22vocab institute; Institute; cmip.drv.001; def; 0; 1; Yes 
    3  - url; isni 
     3 - url {Home page} 
     4 - isni {International Standard Name Identifier (ISO 27729)} 
    45 - description 
    56 - uuid 
  • CMIP6dreqbuild/trunk/src/framework/scanDreq.py

    r373 r375  
    88empty=re.compile('^$') 
    99 
    10 dq = dreq.loadDreq() 
     10src1 = '../workbook/trial_20150831.xml' 
     11 
     12dq = dreq.loadDreq(dreqXML=src1) 
    1113inx = dq.inx 
    1214##inx.makeVarRefs() 
  • CMIP6dreqbuild/trunk/src/workbook/importShDreq.py

    r372 r375  
    309309from scansh import rq 
    310310m = main(sampleXml, rq) 
     311if len(m.err0010.keys()) > 0: 
     312  ks = m.err0010.keys() 
     313  ks.sort() 
     314  for k in ks: 
     315    print 'ERROR.001.0010: variable group not identified: %s [%s]' % (k,m.err0010[k]) 
  • CMIP6dreqbuild/trunk/srcMisc/dreqSX.py

    r374 r375  
    4343## dictionary to carry short names of old variables. 
    4444eeold = {} 
    45 for ee,shv in [(ee0,sh),(eenv,shnv),(eeold,shold)]: 
     45###for ee,shv in [(ee0,sh),(eenv,shnv),(eeold,shold)]: 
     46for ee,shv in [(ee0,sh),(eenv,shnv)]: 
    4647  for k in shv.keys(): 
    4748    ee[k] = shv[k] 
    4849 
    49 for ee,fn in [(ee_xref,'vlsc5_xref_v20150713.xls'),(ee_mip,'vlsc5_mip_v20150713.xls')]: 
    50   wb = workbook( fn ) 
    51   sh = wb.book.sheet_by_name( 'Sheet1' ) 
    52   for i in range(sh.nrows): 
    53     r = map( lambda x: string.strip(str(uniCleanFunc(x.value))), sh.row(i) ) 
    54     ee[r[0]] = r 
     50rmod = 'csv' 
     51eeoldl = collections.defaultdict( list ) 
     52if rmod == 'xls': 
     53  for ee,fn in [(ee_xref,'vlsc5_xref_v20150826.xls'),(ee_mip,'vlsc5_mip_v20150826.xls'),(eeold,'vlsc5b_v20150826.xls')]: 
     54    wb = workbook( fn ) 
     55    sh = wb.book.sheet_by_name( 'Sheet1' ) 
     56    for i in range(sh.nrows): 
     57      r = map( lambda x: string.strip(str(uniCleanFunc(x.value))), sh.row(i) ) 
     58      ee[r[0]] = r 
     59else: 
     60  for ee,fn,sep,xxx in [(ee_xref,'vlsc5_xref_v20150826.csv',',',False),(ee_mip,'vlsc5_mip_v20150826.csv', '\t',False),(eeoldl,'vlsc5b_v20150826.csv', '\t',True)]: 
     61    for i in open(fn).readlines(): 
     62      r = map( lambda x: string.strip(str(uniCleanFunc(x))), string.split( string.strip(i), sep ) ) 
     63      while len(r) < 9: 
     64        r.append( '' ) 
     65      if xxx: 
     66        ee[r[0]].append( r ) 
     67      else: 
     68        ee[r[0]] = r 
     69 
     70 
     71for h in eeoldl.keys(): 
     72  if len( eeoldl[h] ) == 1: 
     73    eeold[h] = eeoldl[h][0] 
     74  else: 
     75    ll = [] 
     76    for k in range( len( eeoldl[h] ) ): 
     77      if eeoldl[h][k][8] == '': 
     78        ll.append(k) 
     79    if len(ll) == 0: 
     80      print 'ERROR.004.0001: no unforwarded record in vlsc5b for h=%s' % h 
     81      eeold[h] = eeoldl[h][0] 
     82    elif len(ll) > 1: 
     83      print 'ERROR.004.0002: multiple unforwarded records in vlsc5b for h=%s (%s)' % (h,eeoldl[h][ll[0]][1]) 
     84      eeold[h] = eeoldl[h][ll[0]] 
     85    else: 
     86      eeold[h] = eeoldl[h][ll[0]] 
    5587 
    5688k2m = [] 
     
    70102## had bug here due to extension of ee_mip to include dimensions. 
    71103## 
     104ee_byvar = collections.defaultdict(list) 
    72105for k in ee_mip.keys(): 
    73106  tid = ee_mip[k][-2] 
     
    75108  ee_miptables_vars_old[tid].append(  ee_mip[k][-3] ) 
    76109  ee_ovars_old['%s.%s' % (tid,ee_mip[k][-3]) ].append(  k ) 
     110  ee_byvar[ ee_mip[k][-3] ].append( k ) 
    77111 
    78112for s in shlist: 
  • CMIP6dreqbuild/trunk/srcMisc/sumMip_pp.py

    r374 r375  
    1  
    21import extractMipInfo as e 
    3 import hashlib, collections, string, uuid 
     2import hashlib, collections, string, uuid, sys 
     3import dreq_cfg 
    44 
    55from utils_wb import workbook 
     
    88 
    99eeatKeys = ['comment', 'deflate_level', 'shuffle', 'ok_max_mean_abs', 'flag_meanings', 'type', 'ok_min_mean_abs', 'standard_name', 'deflate', 'long_name', 'valid_min', 'cell_methods', 'flag_values', 'cell_measures', 'out_name', 'modeling_realm', 'units', '#cell_methods', 'valid_max', 'positive'] 
     10 
     11shKeys = ['priority', 'long name', 'units', 'comment', 'questions & notes', 'output variable name', 'CF Standard name', 'unconfirmed or proposed standard name', 'unformatted units', 'cell_methods', 'valid min', 'valid max', 'mean absolute min', 'mean absolute max', 'positive', 'type', 'CMOR dimensions', 'CMOR variable name', 'realm', 'frequency', 'cell_measures', 'flag_values', 'flag_meanings' ] 
     12 
     13x = [('long name','long_name'),('output variable name','out_name'),('CF Standard name','standard_name'),('valid min','valid_min'),('valid max','valid_max'),('mean absolute min','ok_min_mean_abs'),('mean absolute max','ok_max_mean_abs'),('CMOR dimensions','dims'),('CMOR variable name','var'),('realm','modeling_realm')] 
     14### sheet columns which have no MIP table equiv. 
     15y = ['priority', 'questions & notes', 'unconfirmed or proposed standard name','unformatted units','frequency'] 
     16## MIP table entries which have no workbook source. 
     17y2 = ['deflate_level', 'shuffle', 'deflate', '#cell_methods' ] 
     18 
    1019eeatKeysPlus = eeatKeys + ['var','mip','dims'] 
     20ktr1 = {} 
     21for k in eeatKeysPlus: 
     22  if k in shKeys: 
     23    ktr1[k] = k 
     24 
     25for t in x: 
     26  ktr1[t[1]] = t[0] 
     27 
     28dc = dreq_cfg.rqcfg() 
     29eeomip = [] 
     30for f in dc.ff['OMIP']: 
     31  wbx = workbook( dc.dir0 + 'OMIP/' + f ) 
     32  for s in wbx.sns: 
     33    if s != 'general': 
     34      sh = wbx.book.sheet_by_name( s ) 
     35      started = False 
     36      for i in range(sh.nrows): 
     37        r = map( lambda x: x.value, sh.row(i) ) 
     38        if started: 
     39          k2 = str( uuid.uuid1() ) 
     40          this = [k2,] 
     41          for k in eeatKeysPlus: 
     42            if k == 'mip': 
     43              this.append( 'OMIP.%s' % s ) 
     44            elif ktr1.has_key(k): 
     45              this.append( r[ shKeys.index( ktr1[k] ) ] ) 
     46            else: 
     47              this.append( '' ) 
     48          eeomip.append( this ) 
     49        elif r[0] == 'priority': 
     50          started = True 
    1151 
    1252def mipr2l( dd1 ): 
     
    69109  h = hashlib.md5(string.join(t) ).hexdigest() 
    70110  ee[h] = t 
    71   eee[h].append( z ) 
     111### list of mip tables referencing this item 
     112  eee[h] += z  
    72113 
    73114## need to add v & m .... 
     
    83124    eeh[h].append(k2) 
    84125 
     126#### this is the wrong use of eeh ---- should be key referencing another ...... 
     127#### h is a hash of var, ln, sn, comment, units, used as key of "ee" 
     128ee_extra = {} 
     129for r in eeomip: 
     130  v = r[-3] 
     131  k2 = r[0] 
     132  u = r[17] 
     133  if u == 1.: 
     134    u = '1' 
     135  if u == .001: 
     136    u = '0.001' 
     137## 
     138## need to deal with these rogue entries later as well 
     139## 
     140  if v not in ['',29]: 
     141    t = (v,r[10],r[8],r[1],u) 
     142    h = hashlib.md5(string.join(t) ).hexdigest() 
     143    eeh[h].append(k2) 
     144    ee_extra[h] = t 
     145    eee[h].append( r[-2] ) 
     146 
    85147def mjoin( ll ): 
    86148  oo = '' 
     
    94156wb = workbook( 'vlsc3_ref_01-04.xls' ) 
    95157sh = wb.book.sheet_by_name( 'Sheet1' ) 
    96 vref = {} 
     158vref = collections.defaultdict( list ) 
    97159vrefh = collections.defaultdict( list ) 
    98160for i in range(sh.nrows): 
     
    112174      print r 
    113175      raise 
    114     vref[h] = s + r[5:]  
     176    vref[h].append( s + r[5:]  ) 
    115177    vrefh[k].append( h ) 
    116178een = {} 
     
    124186icsh = [] 
    125187for h in ee: 
    126   this = list(ee[h])  + [str(eee[h][0])[1:-1],] 
     188######''' 
     189  this = list(ee[h])  + [string.join(eee[h],','), ] 
    127190  k = this[0] 
    128191  if not vrefh.has_key(k): 
    129192    een[h] = ee[h] 
    130193    print 'ERR.001.0001: ',str(this) 
    131     if this[-1] == "'CCMI1_satdaily'": 
     194    if this[-1] == "CCMI1_satdaily": 
    132195      icsh.append(h) 
    133196      ics += 1 
     
    138201    print '###############################' 
    139202    for h2 in vrefh[k]: 
    140       print vref[h2][:5] 
     203      print vref[h2][0][:5] 
    141204    print '+++++++++++++++++++++++++++++++' 
    142205    
     
    176239 if h not in icsh: 
    177240  idk += 1 
    178   this = [h,] + list(ee[h])  + [string.join(eee[h][0],','), 'mv.001.%4.4i' % idk] 
     241  this = [h,] + list(ee[h])  + [string.join(eee[h],','), 'mv.001.%4.4i' % idk] 
    179242  assert vref.has_key(h), 'key %s not found' % h 
    180   ll = vref[h][:] 
    181   ll[:5] = ee[h][:5] 
    182   this = '%s\t' % h 
    183   for c in ll: 
    184     try: 
    185       this += '%s\t' % str(c) 
    186     except: 
    187       print c 
    188       print ll 
    189       raise 
     243  for ll in vref[h]: 
     244    ll[:5] = ee[h][:5] 
     245    this = '%s\t' % h 
     246    for c in ll: 
     247      try: 
     248        this += '%s\t' % str(c) 
     249      except: 
     250        print c 
     251        print ll 
     252        raise 
    190253## need to append trailing columns 
    191254  #oo.write( "%s\t%s\t%s\t%s\t'%s'\t%s\t%s\t%s\t\n" % tuple(this) ) 
    192   oo.write( "%s\n" % this ) 
    193  
     255    oo.write( "%s\n" % this ) 
     256 
     257eevr = {} 
     258eevr2 = {} 
    194259for h,k in vrefmissing: 
    195260  assert vref.has_key(h), 'key %s not found [2]' % h 
    196   this = '%s\t' % h 
    197   for c in vref[h]: 
    198     this += '%s\t' % c 
    199   kmv = vref[h][6] 
    200   assert kmv[:3] == 'mv.', 'ERROR.001.0011: unexpected mv identifier: %s, %s ' % (kmv, str(this)) 
    201   if kmv[:3] == 'mv.': 
    202     if ee2.has_key(kmv): 
    203       r = map( str, ee2[kmv] ) 
    204       r2 = map( str, vref[h] ) 
    205       if r[1:5] != r2[:4] or (r[6:9] != r2[5:8]): 
    206         print str(vref[h]) 
    207         print str(r) 
    208         print map( lambda x,y: x == y, r[1:9], r2[:8] ) 
    209         print '------------------------------------------------' 
    210   oo.write( "%s\n" % this ) 
     261  for ll in vref[h]: 
     262    this = '%s\t' % h 
     263    for c in ll: 
     264      this += '%s\t' % c 
     265    kmv = ll[6] 
     266    assert kmv[:3] == 'mv.', 'ERROR.001.0011: unexpected mv identifier: %s, %s ' % (kmv, str(this)) 
     267    if kmv[:3] == 'mv.': 
     268      if ee2.has_key(kmv): 
     269        r = map( str, ee2[kmv] ) 
     270        r2 = map( str, ll ) 
     271        if r[1:5] != r2[:4] or (r[6:9] != r2[5:8]): 
     272          print str(ll) 
     273          print str(r) 
     274          print map( lambda x,y: x == y, r[1:9], r2[:8] ) 
     275          print '------------------------------------------------' 
     276    hp = '%s.%s' % (h,kmv) 
     277    if not eevr2.has_key(hp): 
     278      eevr2[hp] = k 
     279      eevr[h] = k 
     280      oo.write( "%s\n" % this ) 
    211281## need to output additional rows. 
     282idkp = 0 
     283for h in ee_extra.keys(): 
     284  if not (ee.has_key(h) or eevr.has_key(h)): 
     285    idkp += 1 
     286    this0 = list(ee_extra[h])  + [string.join(eee[h],','), 'mv.007.%4.4i' % idkp] 
     287    this = '%s\t' % h 
     288    for c in this0: 
     289      try: 
     290        this += '%s\t' % str(c) 
     291      except: 
     292        print c 
     293        print ll 
     294        raise 
     295    oo.write( "%s\n" % this ) 
     296 
    212297oo.close() 
    213298oo=open( 'vlsc5_xref_v%s.csv' % yyy,'w') 
     
    220305  this = string.join( eel[u], '\t' ) 
    221306  oo.write( '%s\t%s\n' % (u,this) ) 
     307for r in eeomip: 
     308  this = string.join( map( str, r), '\t' ) 
     309  oo.write( '%s\n' % (this) ) 
    222310oo.close() 
    223311print eeat.keys() 
  • CMIP6dreqbuild/trunk/srcMisc/sx2.py

    r374 r375  
    253253nnf = 0 
    254254nnm = 0 
     255nno = 0 
    255256for k in sx.ee_mip.keys(): 
    256257   t = map( lambda x: sx.ee_mip[k][x], [15,10,8,1,17] ) 
    257258   h = hashlib.md5(string.join(t) ).hexdigest() 
    258259   if not sx.ee_xref_bck.has_key(k): 
    259      print 'ERROR.002.0010: missing back reference',sx.ee_mip[k] 
    260      nnf += 1 
     260     r1 = sx.ee_mip[k][1] 
     261     if r1 in ['','29'] or r1[:23] == 'Reference to literature' or r1[:15] == '(copied from 3D': 
     262       print 'WARN.002.0010: dud record',sx.ee_mip[k], sx.ee_mip[k][-2][:4]  
     263     else: 
     264       print 'ERROR.002.0010: missing back reference',sx.ee_mip[k], sx.ee_mip[k][-2][:4]  
     265       nnf += 1 
    261266   elif len(sx.ee_xref_bck[k]) > 1: 
    262267     nnm += 1 
     
    266271       print sx.eeold[k2] 
    267272 
    268 print '#### Number of missing back references: ',nnf 
     273print '#### Number of missing back references: %s (%s)' % (nnf,nno) 
    269274    
    270275 
     
    660665    sh[k] = r[:] 
    661666sh.close() 
    662    
    663667 
    664668sh = shelve.open( '%s/requestLinks' % odir, 'n' ) 
     
    683687## link to group items is now via mip + group id, using stuff from kmapk2 .... 
    684688 
    685 dorest = False 
     689dorest = True 
     690 
     691class psort(object): 
     692 
     693   def __init__(self): 
     694     pass 
     695 
     696   def p(self,z): 
     697     return tabpri.get(z,9) 
     698 
     699   def cmp(self,x,y): 
     700     return cmp( self.p(x), self.p(y) ) 
     701 
    686702if dorest: 
     703 tabpri = { 'SPECS':2, 'CMIP5':0, 'CORDEX':3, 'PMIP3':4 } 
    687704 
    688705 gg = {} 
     
    700717 vdefex = {} 
    701718 for k in sx.ee_mip.keys(): 
     719  omit = False 
    702720  if not sx.ee_xref_bck.has_key( k ): 
     721    r1 = sx.ee_mip[k][1] 
     722    if r1 in ['','29'] or r1[:23] == 'Reference to literature' or r1[:15] == '(copied from 3D': 
     723       print 'WARN.002.0003: dud record',sx.ee_mip[k], sx.ee_mip[k][-2][:4]  
     724       omit = True 
     725    elif sx.ee_mip[k][-2][:4] == 'OMIP': 
     726       print 'ERROR.002.0004: no back references %s' % str(sx.ee_mip[k]) 
     727    else: 
     728      print 'ERROR.002.0003: no back references %s' % str(sx.ee_mip[k]) 
    703729    kl = ['__none__',] 
    704     print 'ERROR.0002.003: no back references %s' % str(sx.ee_mip[k]) 
    705730    vnam0 = '' 
    706731  else: 
    707732    kl = sx.ee_xref_bck[ k ] 
    708733    if len(kl) != 1: 
    709       print 'ERROR.0002.001: multiple back references %s' % str(kl) 
     734      print 'ERROR.002.0001: multiple back references %s' % str(kl) 
    710735      print sx.ee_mip[k] 
    711736    kv = kl[0] 
     
    713738    vnam0 = r[1] 
    714739    kit = 0 
    715     while r[8] not in [ '','null'] and r[8][0] != '*': 
     740    itl = [] 
     741    while r[8] not in [ '','null','Depricated'] and r[8][0] != '*': 
     742      itl.append(r) 
    716743      if ee_mv0.has_key(r[8]): 
    717744        kv = ee_mv0[ r[8] ] 
    718745        r = sx.eeold[kv] 
    719746      else: 
    720         print 'ERROR.0002.002: ambiguos back reference: %s' % str(r) 
     747        print 'ERROR.002.0002: ambiguous back reference: %s' % str(r) 
    721748      kit += 1 
    722       assert kit < 5, 'Too many iterations: %s' % str(r) 
     749      assert kit < 5, 'Too many iterations: %s,\n%s' % (str(r),str(itl)) 
    723750      if kit > 1: 
    724         print 'WARNING.0002.001: chained reference: %s' % str(r) 
     751        print 'WARNING.002.0001: chained reference: %s' % str(r) 
    725752    kl = [kv,] 
    726   if r[1] != vnam0: 
    727     print 'INFO.0001.001: name change:',r,vnam0 
    728   vdefex[k] = list(r) + [vnam0,]   
    729   sh[k] = sx.ee_mip[k][:]  + [kl[0],] 
     753  if not omit: 
     754    if r[1] != vnam0: 
     755      print 'INFO.0001.001: name change:',r,vnam0 
     756    vdefex[k] = list(r) + [vnam0,]   
     757    sh[k] = sx.ee_mip[k][:]  + [kl[0],] 
    730758 sh.close() 
    731759 
     
    748776 
    749777 for m,t in k0: 
     778  print 'INFO.003.0001: ',m,t 
    750779  vars = shb[ '%s.%s' % (m,t) ] 
    751780  ee = {} 
    752781  ff = {} 
    753   rk = 'CMIP5_%s' % t 
     782  if m == "OMIP": 
     783    rk = 'OMIP.%s' % t 
     784  else: 
     785    rk = 'CMIP5_%s' % t 
    754786  if not sx.ee_miptables_old.has_key(rk): 
    755      print '#################### key %s not found ' % rk 
     787     print 'ERROR.003.0001: key %s not found ' % rk 
    756788  else: 
    757789    for u in sx.ee_miptables_old[rk]: 
     
    774806          thisu = '__new__' 
    775807        else: 
    776           print 'ERROR.003.0001 var not found: %s (%s, %s)' % (v,m,t) 
     808          thisu = None 
     809          if sx.ee_byvar.has_key( v ): 
     810             tabs = map( lambda x: sx.ee_mip[x][-2], sx.ee_byvar[v] ) 
     811             tbs = map( lambda x: string.split(x, '_' )[-1], tabs ) 
     812             cls = map( lambda x: string.split(x, '_' )[0], tabs ) 
     813             if t in tbs: 
     814               cl0 = [] 
     815               for k in range(len(tbs)): 
     816                 if tbs[k] == t: 
     817                    cl0.append( cls[k] ) 
     818 
     819               cl0.sort( psort().cmp ) 
     820               ix = tabs.index( '%s_%s' % (cl0[0],t) ) 
     821               print 'WARN.003.0001 var not found -- alternative table used [%s]: %s (%s, %s)' % (cl0[0],v,m,t) 
     822               thisu = sx.ee_byvar[v][ix] 
     823               print sx.ee_mip[thisu] 
     824                
     825             ##print map( lambda x: sx.ee_mip[x][-2], sx.ee_byvar[v] ) 
    777826           
     827          if thisu == None: 
     828            print 'ERROR.003.0001 var not found: %s (%s, %s)' % (v,m,t) 
    778829          if m == 'OMIP': 
    779830            kr += 1 
     
    782833            dop.a['omip2'].write( string.join( map(lambda x: sr[x],[4,0,5,2,7]) + ['%s_%s' % (m,t),'mv.005.%4.4i' % kr], '\t' ) + '\n' ) 
    783834           
    784 ## not finding OMIP Oyr variables ... even where they are in vlsc5 and hencd in shold. Problem is table ref. 
    785 ## need to look at new names as well ... as renamed in, e.g. OMIP 
    786           thisu = None 
    787835      else: 
    788836        thisu = ee[v] 
Note: See TracChangeset for help on using the changeset viewer.