Changeset 780
- Timestamp:
- 15/10/16 22:19:33 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
CMIP6dreqbuild/trunk/src/framework/ingest/expts.py
r779 r780 2 2 import os 3 3 import json, re, collections, string 4 from dreqPy import dreq 4 5 5 6 fmip = re.compile( '([a-zA-Z0-9]{1,10}MIP|DCPP)' ) 7 EXPTFILE='/data/tmp/svn3/exarch/CMIP6dreqbuild/trunk/src/framework/ingest/CMIP6_Experiments_v50.xlsx' 6 8 7 9 class ktEx(object): … … 17 19 self.mip[k] = m.groups()[0] 18 20 19 EXPTFILE='/data/tmp/svn3/exarch/CMIP6dreqbuild/trunk/src/framework/ingest/CMIP6_Experiments_v50.xlsx'20 21 class esdocEx(object): 21 22 def __init__(self,file=EXPTFILE ): … … 23 24 self.wb = utils_wb.workbook( file ) 24 25 self.load() 26 27 def parseTempCon( self, tcon ): 28 if tcon[-14:] == 'yrsAfterSpinUp': 29 return int( tcon[:-14] ) 30 if tcon[-3:] != 'yrs': 31 if tcon == '1790-1858': 32 return 68 33 else: 34 print 'SEVERE: badly formatted time constraint: ',tcon 35 return 0 36 elif tcon == '2014-present N yrs': 37 return 4 38 else: 39 print tcon 40 t1 = tcon[:-3].strip() 41 if t1.find( ' ' ) != -1: 42 t1 = t1.split( ' ' )[-1] 43 return int (t1 ) 25 44 26 45 def load(self): … … 35 54 en = r[2] 36 55 if en != 'N/A': 56 tcon = r[25] 57 nyrs = self.parseTempCon( tcon ) 37 58 m = fmip.match( r[0] ) 38 59 if m != None: … … 69 90 u'ssp37-ssp126Lu':u'ssp370-ssp126Lu'} 70 91 71 def xcheck(): 72 es = esdocEx() 73 kt = ktEx() 74 nf = [] 75 matchByOther = {} 76 lcMatch = {} 77 pMatch = {} 78 mMatch = {} 79 s0 = set() 80 for k in es.expts: 81 if k in kt.expts: 82 s0.add(k) 83 else: 84 done = False 85 if k in es.alias: 86 s1 = [o for o in es.alias[k] if o in kt.expts] 87 assert len(s1) <= 1, 'Multiple aliases found for %s: %s' % (k, str(s1) ) 88 if len(s1) != 0: 89 matchByOther[k] = s1[0] 90 done = True 91 if not done: 92 if k[:4] == 'dcpp': 93 k1 = string.split(k,'-',maxsplit=1)[-1] 94 if k1 in kt.expts: 95 pMatch[k] = k1 92 class request(object): 93 def __init__(self,xc): 94 dq = dreq.loadDreq() 95 self.mapped = set() 96 self.missing = set() 97 self.ok = set() 98 for i in dq.coll['experiment'].items: 99 if i.label not in xc.mapToKt: 100 print 'NOT FOUND: %s: %s -- %s' % (i.mip,i.label, i.title) 101 self.missing.add( i.label ) 102 elif xc.mapToKt[i.label] == i.label: 103 self.mapped.add(i.label) 104 else: 105 self.ok.add( i.label ) 106 print '%s experiments: %s ok, %s mapped, %s unknown: ' % (len( dq.coll['experiment'].items ), len(self.ok), len( self.mapped), len( self.missing) ) 107 108 class checker(object): 109 def __init__(self): 110 self.es = esdocEx() 111 self.kt = ktEx() 112 def xcheck(self): 113 es = esdocEx() 114 kt = ktEx() 115 nf = [] 116 matchByOther = {} 117 lcMatch = {} 118 pMatch = {} 119 mMatch = {} 120 s0 = set() 121 for k in es.expts: 122 if k in kt.expts: 123 s0.add(k) 124 else: 125 done = False 126 if k in es.alias: 127 s1 = [o for o in es.alias[k] if o in kt.expts] 128 assert len(s1) <= 1, 'Multiple aliases found for %s: %s' % (k, str(s1) ) 129 if len(s1) != 0: 130 matchByOther[k] = s1[0] 96 131 done = True 97 if k in lMaps: 98 if lMaps[k] in kt.expts: 99 mMatch[k] = lMaps[k] 100 done = True 101 if not done: 102 nf.append( k ) 103 print '%s experiments not found in PCMDI experiment CV' % len(nf) 104 print sorted( nf ) 105 106 nf2 = [] 107 for k in kt.expts: 108 if k not in es.expts: 109 nf2.append( k ) 110 print '%s experiments not found in ES-DOC experiment list' % len(nf2) 111 print sorted( nf2 ) 112 113 ee = {} 114 ee2 = {} 115 for x in nf: 116 ee[x.lower()] = x 117 for x in nf2: 118 ee2[x.lower()] = x 119 120 s = set() 121 for x in ee: 122 if x in ee2: 123 lcMatch[ee[x]] = ee2[x] 124 s.add( ee2[x] ) 125 126 rf = [x for x in nf if x not in lcMatch] 127 print '%s ESDOC experiments not found in PCMDI experiment CV after lc' % len(rf) 128 print '%s straight matches [%s,%s]' % (len(s0),len(es.expts),len(kt.expts)) 129 indMatch = set() 130 for x in matchByOther, lcMatch, pMatch, mMatch: 131 for k in x: 132 indMatch.add( x[k] ) 133 rf2 = [x for x in nf2 if x not in indMatch] 134 print '%s PCMDI experiments not found in ES-DOC experiment list indirect matches' % len(rf2) 135 cc = collections.defaultdict( set ) 136 for k in rf: 137 cc[ es.mip[k] ].add( k ) 138 cc2 = collections.defaultdict( set ) 139 for k in rf2: 140 cc2[ kt.mip[k] ].add( k ) 141 for m in sorted( list (cc.keys() ) ): 142 print m, '[ES-DOC]', sorted( list( cc[m] ) ) 143 if m in cc2: 144 print m, '[PCMDI]', sorted( list( cc2[m] ) ) 145 return es, kt, matchByOther, lcMatch, pMatch, mMatch, rf , s0 146 147 es,kt, mo, lcm, pm, mm, rf, s0 = xcheck() 132 if not done: 133 if k[:4] == 'dcpp': 134 k1 = string.split(k,'-',maxsplit=1)[-1] 135 if k1 in kt.expts: 136 pMatch[k] = k1 137 done = True 138 if k in lMaps: 139 if lMaps[k] in kt.expts: 140 mMatch[k] = lMaps[k] 141 done = True 142 if not done: 143 nf.append( k ) 144 print '%s experiments not found in PCMDI experiment CV' % len(nf) 145 print sorted( nf ) 146 147 nf2 = [] 148 for k in kt.expts: 149 if k not in es.expts: 150 nf2.append( k ) 151 print '%s experiments not found in ES-DOC experiment list' % len(nf2) 152 print sorted( nf2 ) 153 154 ee = {} 155 ee2 = {} 156 for x in nf: 157 ee[x.lower()] = x 158 for x in nf2: 159 ee2[x.lower()] = x 160 161 s = set() 162 for x in ee: 163 if x in ee2: 164 lcMatch[ee[x]] = ee2[x] 165 s.add( ee2[x] ) 166 167 rf = [x for x in nf if x not in lcMatch] 168 print '%s ESDOC experiments not found in PCMDI experiment CV after lc' % len(rf) 169 print '%s straight matches [%s,%s]' % (len(s0),len(es.expts),len(kt.expts)) 170 indMatch = set() 171 for x in matchByOther, lcMatch, pMatch, mMatch: 172 for k in x: 173 indMatch.add( x[k] ) 174 rf2 = [x for x in nf2 if x not in indMatch] 175 print '%s PCMDI experiments not found in ES-DOC experiment list indirect matches' % len(rf2) 176 cc = collections.defaultdict( set ) 177 for k in rf: 178 cc[ es.mip[k] ].add( k ) 179 cc2 = collections.defaultdict( set ) 180 for k in rf2: 181 cc2[ kt.mip[k] ].add( k ) 182 for m in sorted( list (cc.keys() ) ): 183 print m, '[ES-DOC]', sorted( list( cc[m] ) ) 184 if m in cc2: 185 print m, '[PCMDI]', sorted( list( cc2[m] ) ) 186 187 self.mapToKt = {} 188 for x in (matchByOther, lcMatch, pMatch, mMatch): 189 for k in x: 190 self.mapToKt[k] = x[k] 191 for k in s0: 192 self.mapToKt[k] = k 193 for k in es.alias: 194 if k in self.mapToKt: 195 for x in es.alias[k]: 196 self.mapToKt[x] = self.mapToKt[k] 197 198 return es, kt, matchByOther, lcMatch, pMatch, mMatch, rf , s0 199 200 xc = checker() 201 202 es,kt, mo, lcm, pm, mm, rf, s0 = xc.xcheck() 203 rq = request( xc ) 148 204 sa = set() 149 205 for x in mo, lcm, pm, mm, rf, s0:
Note: See TracChangeset
for help on using the changeset viewer.