Changeset 1011
- Timestamp:
- 23/05/17 17:43:15 (5 years ago)
- Location:
- CMIP6dreqbuild/trunk/src/framework
- Files:
-
- 1 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
CMIP6dreqbuild/trunk/src/framework/ingest/expts.py
r780 r1011 4 4 from dreqPy import dreq 5 5 6 7 import exptMaps as em 8 6 9 fmip = re.compile( '([a-zA-Z0-9]{1,10}MIP|DCPP)' ) 7 10 EXPTFILE='/data/tmp/svn3/exarch/CMIP6dreqbuild/trunk/src/framework/ingest/CMIP6_Experiments_v50.xlsx' 11 EXPTFILE='/data/work/tmp/git/esdoc-docs/cmip6/experiments/spreadsheet/experiments.xlsx' 12 ## experiments sheet copied and pasted to resolve internal references. 13 EXPTFILE='CMIP6_experiments_esdoc_190517.xlsx' 8 14 9 15 class ktEx(object): 10 def __init__(self, file='/data/work/tmp/git/ cmip6-cmor-tables/Tables/CMIP6_CV.json' ):16 def __init__(self, file='/data/work/tmp/git/CMIP6_CVs/CMIP6_experiment_id.json' ): 11 17 assert os.path.isfile( file ),'File %s not found' % file 12 18 ex = json.load( open( file ) ) 13 self.expts = ex['CV']['experiment_ids'] 19 ##self.expts = ex['CV']['experiment_ids'] 20 self.expts = ex['experiment_id'] 14 21 self.mip = {} 15 22 for k in self.expts: 16 m0 = self.expts[k]['activity_id'] 23 m0 = self.expts[k]['activity_id'][0] 17 24 m = fmip.match( m0 ) 18 25 assert m != None, 'No activity for experiment %s' % k … … 32 39 return 68 33 40 else: 34 print 'SEVERE: badly formatted time constraint: ',tcon 41 print 'SEVERE: badly formatted time constraint: ',tcon,self.rown,self.experiment 35 42 return 0 36 43 elif tcon == '2014-present N yrs': 37 44 return 4 38 45 else: 39 print tcon40 46 t1 = tcon[:-3].strip() 41 47 if t1.find( ' ' ) != -1: 42 48 t1 = t1.split( ' ' )[-1] 49 if t1.find( '.' ) != -1: 50 return float( t1 ) 43 51 return int (t1 ) 44 52 … … 50 58 self.mip = {} 51 59 for i in range(2,sht.nrows): 60 self.rown = i 52 61 r = [x.value for x in sht.row(i)] 53 62 if r[0] != '': 54 63 en = r[2] 55 if en != 'N/A': 56 tcon = r[25] 64 self.experiment = en 65 if en.strip() not in [ 'N/A', 'n/a', '', ' ']: 66 tcon = r[30] 57 67 nyrs = self.parseTempCon( tcon ) 58 68 m = fmip.match( r[0] ) … … 62 72 mip = '---' 63 73 if en in cc: 64 print 'SEVERE: DUPLICATE EXPERIMENT NAME : %s::\n %s\n ----- \n%s ' % (en,str(cc[en]),str(r))74 print 'SEVERE: DUPLICATE EXPERIMENT NAME (ES-DOC): %s::\n %s\n ----- \n%s ' % (en,str(cc[en]),str(r)) 65 75 self.dn.append(en) 66 76 cc[en] = r … … 78 88 print '%s records scanned' % len(cc.keys() ) 79 89 print self.dn 90 91 esDcppMap1 = [u'dcppC-amv-extrop-minus', u'dcppC-amv-extrop-plus', u'dcppC-amv-minus', u'dcppC-amv-plus', u'dcppC-amv-trop-minus', u'dcppC-amv-trop-plus', u'dcppC-ipv-minus', u'dcppC-ipv-plus', u'dcppC-pac'] 92 ktDcppMap1 = [u'dcppC-amv-ExTrop-neg', u'dcppC-amv-ExTrop-pos', u'dcppC-amv-neg', u'dcppC-amv-pos', u'dcppC-amv-Trop-neg', u'dcppC-amv-Trop-pos', u'dcppC-ipv-neg', u'dcppC-ipv-pos', u'dcppC-pac-control'] 80 93 81 94 lMaps = { … … 90 103 u'ssp37-ssp126Lu':u'ssp370-ssp126Lu'} 91 104 105 for i in range( len( esDcppMap1 ) ): 106 lMaps[esDcppMap1[i]] = ktDcppMap1[i] 107 108 rqMaps = {} 109 for k in em.exptMap: 110 for i in em.exptMap[k]: 111 assert i not in rqMaps 112 rqMaps[i] = em.exptMap[k][i] 113 92 114 class request(object): 93 115 def __init__(self,xc): 94 116 dq = dreq.loadDreq() 95 117 self.mapped = set() 118 self.mappedbymip = collections.defaultdict( set ) 96 119 self.missing = set() 120 self.incomplete = set() 121 self.used = set() 97 122 self.ok = set() 123 cc = collections.defaultdict( list ) 124 for k in rqMaps: 125 assert k not in xc.mapToKt 126 xc.mapToKt[k] = rqMaps[k] 98 127 for i in dq.coll['experiment'].items: 99 128 if i.label not in xc.mapToKt: 100 print 'NOT FOUND: %s: %s -- %s' % (i.mip,i.label, i.title) 101 self.missing.add( i.label ) 102 elif xc.mapToKt[i.label] == i.label: 103 self.mapped.add(i.label) 129 if i.label in xc.kt.expts: 130 self.incomplete.add( i.label ) 131 self.used.add( i.label ) 132 else: 133 self.missing.add( i.label ) 134 cc[i.mip].append( i.label ) 104 135 else: 105 self.ok.add( i.label ) 136 self.used.add( xc.mapToKt[i.label] ) 137 if xc.mapToKt[i.label] != i.label: 138 self.mapped.add(i.label) 139 self.mappedbymip[i.mip].add((i.label,xc.mapToKt[i.label])) 140 else: 141 self.ok.add( i.label ) 106 142 print '%s experiments: %s ok, %s mapped, %s unknown: ' % (len( dq.coll['experiment'].items ), len(self.ok), len( self.mapped), len( self.missing) ) 143 print 'Request experiments not found: %s' % (len(self.missing) ) 144 ccu = collections.defaultdict( list ) 145 for k in xc.kt.expts: 146 if k not in self.used: 147 ccu[xc.kt.mip[k]].append( k ) 148 for m in sorted( cc.keys() ): 149 print 'UNMATCHED: %s: %s' % (m, sorted( cc[m] ) ) 150 if m in ccu: 151 print '--- UNUSED: %s' % (sorted( ccu[m] ) ) 152 for m in sorted( self.mappedbymip.keys() ): 153 print 'MAPPED: %s:: %s' % (m,sorted( list(self.mappedbymip[m]))) 154 107 155 108 156 class checker(object): … … 111 159 self.kt = ktEx() 112 160 def xcheck(self): 113 es = esdocEx()114 kt = ktEx()161 es = self.es 162 kt = self.kt 115 163 nf = [] 116 164 matchByOther = {} … … 142 190 if not done: 143 191 nf.append( k ) 144 print '%s experiments not found in PCMDI experiment CV' % len(nf)192 print '%s ES-DOC experiments not found in PCMDI experiment CV' % len(nf) 145 193 print sorted( nf ) 194 self.nf = nf 146 195 147 196 nf2 = [] … … 151 200 print '%s experiments not found in ES-DOC experiment list' % len(nf2) 152 201 print sorted( nf2 ) 202 self.nf2 = nf2 153 203 154 204 ee = {} … … 166 216 167 217 rf = [x for x in nf if x not in lcMatch] 218 print 'Lower case matches: %s' % str( lcMatch ) 219 print 'Mapped matches (PCMDI->ES-DOC) [%s]: %s' % (len(mMatch.keys()),str( mMatch )) 168 220 print '%s ESDOC experiments not found in PCMDI experiment CV after lc' % len(rf) 169 221 print '%s straight matches [%s,%s]' % (len(s0),len(es.expts),len(kt.expts)) … … 173 225 indMatch.add( x[k] ) 174 226 rf2 = [x for x in nf2 if x not in indMatch] 175 print '%s PCMDI experiments not found in ES-DOC experiment list indirect matches ' % len(rf2)227 print '%s PCMDI experiments not found in ES-DOC experiment list indirect matches [%s]' % (len(rf2), len(set(rf2))) 176 228 cc = collections.defaultdict( set ) 229 print ' ----- IN ES-DOC ONLY: ' 177 230 for k in rf: 178 231 cc[ es.mip[k] ].add( k ) … … 183 236 print m, '[ES-DOC]', sorted( list( cc[m] ) ) 184 237 if m in cc2: 238 print m, '[PCMDI]', sorted( list( cc2[m] ) ) 239 for m in cc2: 240 if m not in cc: 185 241 print m, '[PCMDI]', sorted( list( cc2[m] ) ) 186 242 … … 223 279 print '%s mapped matches' % len( mm.keys()) 224 280 print mm 281 282 ee = {} 283 for k in rq.mapped: 284 ##if k not in xc.kt.expts: 285 ee[k] = xc.mapToKt[k] 286 for k in mm: 287 ee[k] = mm[k] 288 for k in lcm: 289 ee[k] = lcm[k] 290 oo = open( 'exptNameMaps.json', 'w' ) 291 json.dump( {'info':{'title':'Updating experiment names in data request'}, 'maps':ee}, oo, indent=4, sort_keys=True ) 292 oo.close() 293 294 225 295 ## from PCMDI github ...
Note: See TracChangeset
for help on using the changeset viewer.