Changeset 430


Ignore:
Timestamp:
22/10/15 23:37:17 (4 years ago)
Author:
mjuckes
Message:

cleaning of duplicate pruning workflow

Location:
CMIP6dreqbuild/trunk/src/framework
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • CMIP6dreqbuild/trunk/src/framework/dreqPy/scope.py

    r429 r430  
    55import dreq 
    66from utilities import cmvFilter 
    7 import collections, string 
     7import collections, string, operator 
    88import sys 
    99 
     
    2020    return self.msg 
    2121 
     22class cmpd(object): 
     23  def __init__(self,dct): 
     24    self.d = dct 
     25  def cmp(self,x,y,): 
     26    return cmp( self.d[x], self.d[y] ) 
     27 
    2228nt_mcfg = collections.namedtuple( 'mcfg', ['nho','nlo','nha','nla','nlas','nls','nh1'] ) 
     29    self.default_mcfg = nt_mcfg._make( [259200,60,64800,40,20,5,100] ) 
    2330 
    2431def filter1( a, b ): 
     
    7784    for i in self.dq.coll['spatialShape'].items: 
    7885      type = 'a' 
    79       if i.levelFlag == 'false': 
     86      if i.levelFlag == False: 
    8087        ds =  i.dimensions.split( '|' ) 
    8188        if ds[-1] in ['site', 'basin']: 
     
    144151    return self.ntot 
    145152 
    146   def volByExpt( self, l1, ex, exptList, pmax=2, cc=None, retainRedundantRank=False ): 
     153  def volByExpt( self, l1, ex, exptList, pmax=2, cc=None, retainRedundantRank=False, intersection=False ): 
    147154    """volByExpt: calculates the total data volume associated with an experiment/experiment group and a list of request items. 
    148155          The calculation has some approximations concerning the number of years in each experiment group.""" 
     
    151158## 
    152159    inx = self.dq.inx 
     160    imips = {i.mip for i in l1} 
    153161## 
    154162## rql is the set of all request links which are associated with a request item for this experiment set 
     
    161169 
    162170## The complete set of variables associated with these requests: 
    163     rqvg = list({inx.uid[i].refid for i in rql}) 
     171    tm = 1 
     172    if tm == 0: 
     173      rqvg = list({inx.uid[i].refid for i in rql}) 
     174    else: 
     175      cc1 = collections.defaultdict( set ) 
     176      for i in rql: 
     177        cc1[inx.uid[i].mip].add( inx.uid[i].refid ) 
     178 
     179      if intersection: 
     180        ccv = {} 
     181# 
     182# set of request variables for each MIP 
     183## 
     184        for k in cc1: 
     185          thisc = reduce( operator.or_, [set( inx.iref_by_sect[vg].a['requestVar'] ) for vg in cc1[k] ] ) 
     186          ccv[k] = {inx.uid[l].vid for l in list(thisc) if inx.uid[l].priority <= pmax} 
     187 
     188        if len( ccv.keys() ) < len( list(imips) ): 
     189          vars = set() 
     190        else: 
     191          vars =  reduce( operator.and_, [ccv[k] for k in ccv] ) 
     192      else: 
     193        rqvg = reduce( operator.or_, [cc1[k] for k in cc1] ) 
    164194 
    165195###To obtain a set of variables associated with this collection of variable groups: 
    166     col1 = set() 
    167     x = {tuple( {col1.add(i) for i in inx.iref_by_sect[vg].a['requestVar']} ) for vg in rqvg} 
     196 
     197        col1 = reduce( operator.or_, [set( inx.iref_by_sect[vg].a['requestVar'] ) for vg in rqvg ] ) 
     198 
    168199###The collector col1 here accumulates all the record uids, resulting in a single collection. These are request variables, to get a set of CMOR variables at priority <= pmax: 
    169     vars = {inx.uid[l].vid for l in list(col1) if inx.uid[l].priority <= pmax} 
     200        vars = {inx.uid[l].vid for l in list(col1) if inx.uid[l].priority <= pmax} 
     201## 
     202## if looking for the union, would have to do a filter here ... after looking up which vars are requested by each MIP ... 
     203## 
     204## possibly some code re-arrangement would help. 
     205## e.g. create a set for each MIP a couple of lines back .... 
    170206 
    171207### filter out cases where the request does not point to a CMOR variable. 
     
    205241      ov.append( self.dq.inx.uid[v] ) 
    206242    ee = self.listIndexDual( ov, 'frequency', 'label', acount=None, alist=None, cdict=szv, cc=cc ) 
    207     self.ngptot = sum( [  self.sz[inx.uid[v].stid]* npy[inx.uid[v].frequency] *nym[v]  for v in vars] ) 
    208     return (self.ngptot, ee ) 
     243    ff = {} 
     244    for v in vars: 
     245      ff[v] = self.sz[ inx.uid[v].stid ] * npy[inx.uid[v].frequency] * nym[v] 
     246    self.ngptot = sum( [  ff[v]  for v in vars] ) 
     247    return (self.ngptot, ee, ff ) 
    209248 
    210249  def esid_to_exptList(self,esid,deref=False): 
     
    362401      -t <tier> maxmum tier; 
    363402      -p <priority>  maximum priority; 
     403      --printLinesMax <n>: Maximum number of lines to be printed 
     404      --printVars  : If present, a summary of the variables fitting the selection options will be printed 
    364405""" 
    365406  def __init__(self,args): 
    366407    self.adict = {} 
    367     self.knownargs = {'-m':('m',True), '-p':('p',True), '-t':('t',True), '-h':('h',False)}  
     408    self.knownargs = {'-m':('m',True), '-p':('p',True), '-t':('t',True), '-h':('h',False), '--printLinesMax':('plm',True), '--printVars':('vars',False)}  
    368409    aa = args[:] 
    369410    while len(aa) > 0: 
     
    380421      self.adict['m'] = set(self.adict['m'].split(',') ) 
    381422 
    382     integerArgs = {'p','t'} 
     423    integerArgs = {'p','t','plm'} 
    383424    for i in integerArgs.intersection( self.adict ): 
    384425      self.adict[i] = int( self.adict[i] ) 
     
    400441 
    401442    sc = dreqQuery( dq=self.dq ) 
     443 
     444    ok = True 
     445    for i in self.adict['m']: 
     446        if i not in sc.mips: 
     447          ok = False 
     448          print 'NOT FOUND: ',i 
     449    assert ok,'Available MIPs: %s' % str(sc.mips) 
     450 
    402451    tierMax = self.adict.get( 't', 2 ) 
    403452    sc.setTierMax(  tierMax ) 
    404453    pmax = self.adict.get( 'p', 2 ) 
    405     v = sc.volByMip( self.adict['m'], pmax=pmax ) 
    406     print '%7.2fTb' % (v*2.*1.e-12) 
     454    v0 = sc.volByMip( self.adict['m'], pmax=pmax ) 
     455    print '%7.2fTb' % (v0*2.*1.e-12) 
     456    cc = collections.defaultdict( int ) 
     457    for e in sc.volByE: 
     458      for v in sc.volByE[e][2]: 
     459          cc[v] += sc.volByE[e][2][v] 
     460    x = 0 
     461    for v in cc: 
     462      x += cc[v] 
     463     
     464    vl = sorted( cc.keys(), cmp=cmpd(cc).cmp, reverse=True ) 
     465    if self.adict.get( 'vars', False ): 
     466      printLinesMax = self.adict.get( 'plm', 20 ) 
     467      if printLinesMax > 0: 
     468        mx = min( [printLinesMax,len(vl)] ) 
     469      else: 
     470        mx = len(vl) 
     471 
     472      for v in vl[:mx]: 
     473        print self.dq.inx.uid[v].label, '%7.2fTb' % (cc[v]*2.*1.e-12) 
  • CMIP6dreqbuild/trunk/src/framework/scanDreq.py

    r429 r430  
    115115### the results of the 2nd pass go back to ../workbook to generate a new set of inputs. 
    116116### 
    117 up = updates('varDup_20150928.csv', 'varMult_20150725.csv') 
     117up = updates('varDup.csv', 'varMult.csv', idir='rev2') 
    118118up.scandup() 
    119119up.scanmult() 
Note: See TracChangeset for help on using the changeset viewer.