import glob, collections, shelve from utils_wb import workbook class icol(object): def __init__(self): self.cc = collections.defaultdict( list ) class loadSpatial(object): def __init__(self): wb = workbook( '../workbook/inputs/spatialShape_ref.xls' ) s1 = wb.book.sheet_by_name(u'ss_ref') self.ss = {} self.labByUid = {} self.uidByLab = {} uu = set() for i in range(s1.nrows): rr = [x.value for x in s1.row(i)] if rr[0] != 'label': assert rr[2] not in self.ss, 'Duplicate dimension set in spatial shape sheet: %s' % rr[2] a,b,k,lf = tuple( [x.strip() for x in rr[:4] ] ) li = str( int( rr[4] ) ) u = str( rr[6] ) assert u not in uu, 'Duplicate uid in spatialShape_ref.xls: %s' % u self.labByUid[u] = rr[0] self.uidByLab[rr[0]] = u self.ss[ k ] = (a,b,lf,li,u) class struct(object): def __init__(self): self.force = ('str-a02',) self.f = '../workbook/inputs/structRef.xls' self.ih = ['Label', 'Title', 'Spatial shape', 'Other dims', 'coords', 'cell methods', 'cell measures', 'flag meanings', 'flag values', 'Spatial shape (title)', 'Spatial dimensions'] old = False if old: ii = open( '../workbook/inputs/strUidMap.txt' ) self.uByLab = {} for l in ii.readlines(): l,u = l.strip().split() self.uByLab[ l ] = u ii.close() else: self.refStr() self.ldsp = loadSpatial() self.summary = {} ii = open( 'strRef2.txt' ) self.labs = set() for l in ii.readlines(): bits = l.strip().split( '\t' ) description = '' if len(bits) == 8: u, lab, spid, tmid, title, description, flag_values, tlab = bits else: print 'SEVERE: can not parse ',l raise description = description[1:-1] flag_values = flag_values[1:-1] self.summary[u] = (lab, tmid, spid, title, description, flag_values, tlab) self.labs.add(lab) ii.close() ## (i.tmid,i.spid,i.odims,i.coords,i.cell_methods,i.cell_measures,i.flag_meanings) ## Label Title Spatial shape Other dims coords cell methods cell measures flag meanings flag values Spatial shape (title) Spatial dimensions def refStr(self): wb = workbook( '../workbook/inputs/refStructure.xlsx' ) sht = wb.book.sheet_by_name( 's1' ) self.uByLab = {} for i in range(1,sht.nrows): r = [x.value for x in sht.row(i)] if len(r[0]) == 0 or r[0][0] != '#': uid = str( r[-1] ) lab = r[2] self.uByLab[lab] = uid ii = open( '../workbook/inputs/strUidMap.txt' ) for l in ii.readlines(): l,u = l.strip().split() if l not in self.uByLab: self.uByLab[ l ] = u ii.close() def read(self): self.map = dict() wb=workbook( self.f ) print wb.sns for sn in sorted( wb.sns ): if sn != '__mods__': sht = wb.book.sheet_by_name( sn ) nf = 0 for i in range( sht.nrows ): r = [x.value for x in sht.row(i)] print '>>> ',r if r[1] != 'Label' and not( len(r[0]) > 0 and r[0][0] == '#' ): if r[1] not in self.uByLab: nf += 1 print 'LABEL NOT FOUND: ',r assert nf == 0,'nf= %s [%s]' % (nf,sn) for i in range( sht.nrows ): r = [x.value for x in sht.row(i)] if r[1] != 'Label' and not( len(r[0]) > 0 and r[0][0] == '#' ): print '>>> ',r u = self.uByLab[ r[1] ] if u in self.summary: lab, tmid, spid, title, description, fv, tlab = self.summary[u] if title != r[2]: print 'SEVERE: title mismatch :',title,r[2],r[1],lab,u t = (tmid,spid,r[4],r[5],r[6],r[7],r[8]) if t in self.map: print 'Duplicate lookup ... %s' % str(t) print '--------',(u,lab,title,description,fv) print '--------',self.map[t] self.map[t] = (u,lab,title,description,fv,tlab,True) else: if len(r[0]) > 8 and r[0][0] == '*': if r[3] in self.ldsp.uidByLab: spid = self.ldsp.uidByLab[r[3]] t = (sn,spid,r[4],r[5],r[6],r[7],r[8]) if t in self.map: print 'Duplicate lookup [2] ... %s' % str(t) print '--------',(u,lab,title,description,fv) print '--------',self.map[t] self.map[t] = (u,r[1],'','','',sn,False) print 'INFO.str.0042: generating incomplete record %s: %s' % (r[1],r[2]) else: print 'INFO.str.0041: ignoring %s: %s' % (r[1],r[2]) elif r[1] not in self.labs and r[1] not in self.force: print 'INFO.str.0040: ignoring %s: %s' % (r[1],r[2]) else: print 'ERROR: no uid found for ..:',sn,r def save(self): sh = shelve.open( 'inSh/structRef', 'n' ) sh['__info__'] = 'Created by ifiles.struct .. information about structures keyed on tmid, spid, odims, coords, cell_methods, cell_measures, flag_meanings' sh['__cols__'] = ['tmid', 'spid', 'odims', 'coords', 'cell_methods', 'cell_measures', 'flag_meanings', 'uid','label','title','description','flag_values', 'time_label'] s1 = set() ubl = dict() for k in self.map: u = self.map[k][0] sh[u] = list(k) + list(self.map[k]) u,l = sh[u][7:9] ubl[l] = u rr = sh[u][:7] rr[0] = self.map[k][-1] tt = tuple(rr) if tt in s1: print 'SEVERE: duplicate of prospective lookup .... ',tt s1.add(tt) if tt[5] == '@OPT': print tt, u, sh[u] sh.close() sh = shelve.open( 'inSh/structRef02', 'n' ) sh['__info__'] = 'Created by ifiles.struct .. label to uid' sh['__cols__'] = ['uid','label'] for l in self.uByLab: if l not in ubl: sh[str(l)] = str( self.uByLab[l] ) print 'INFO.structure.50080: adding uid-by-lab record: %s %s' % (self.uByLab[l],l) sh.close() class probDup(object): def __init__(self): self.fl = sorted( glob.glob( 'ingest_files/cmv_probDuplication*.xls' ) ) self.cc = collections.defaultdict( list ) self.cch = dict() self.records = dict() def read(self): for f in self.fl: wb=workbook( f ) sht = wb.book.sheet_by_name( 'Sheet1' ) key = None for i in range( sht.nrows ): rr = [x.value for x in sht.row(i)] if rr[0] == 'NEXT': key = tuple( rr[1:] ) else: u = rr[1] assert u not in self.records, 'Duplicate mappings for %s' % u self.records[u] = rr self.cc[key].append( rr[:2] ) def review(self): print 'INFO:number of records: %s' % len(self.cc.keys()) nsk = 0 nch = 0 for k in self.cc: c1 = collections.defaultdict( set ) kb = set() for r in self.cc[k]: c1[r[0]].add( r[1] ) if r[0] not in ['+','-']: kb.add( r[0] ) if len(kb) > 0: print 'WARN: unrecognised directives: %s: %s' % (str(k),str(kb)) elif len( c1['-'] ) == 0: nsk += 1 elif len( c1['+'] ) == 1: nch += 1 assert k not in self.cch, 'Duplicate change record key: %s' % k self.cch[k] = (list( c1['+'] )[0],sorted( list( c1['-'] ) ) ) else: print 'WARN: cannot scan directives: %s: n+: %s; n-:%s' % (str(k),len( c1['+'] ),len( c1['-'] )) print 'INFO: records with no actions: %s' % nsk print 'INFO: records with change instruction: %s' % nch if __name__ == '__main__': #pd = probDup() #pd.read() #pd.review() s = struct() s.read() s.save()