1 | import glob, collections, shelve |
---|
2 | from utils_wb import workbook |
---|
3 | |
---|
4 | class icol(object): |
---|
5 | def __init__(self): |
---|
6 | self.cc = collections.defaultdict( list ) |
---|
7 | |
---|
8 | class loadSpatial(object): |
---|
9 | def __init__(self): |
---|
10 | wb = workbook( '../workbook/inputs/spatialShape_ref.xls' ) |
---|
11 | s1 = wb.book.sheet_by_name(u'ss_ref') |
---|
12 | self.ss = {} |
---|
13 | self.labByUid = {} |
---|
14 | self.uidByLab = {} |
---|
15 | uu = set() |
---|
16 | for i in range(s1.nrows): |
---|
17 | rr = [x.value for x in s1.row(i)] |
---|
18 | if rr[0] != 'label': |
---|
19 | assert rr[2] not in self.ss, 'Duplicate dimension set in spatial shape sheet: %s' % rr[2] |
---|
20 | a,b,k,lf = tuple( [x.strip() for x in rr[:4] ] ) |
---|
21 | li = str( int( rr[4] ) ) |
---|
22 | u = str( rr[6] ) |
---|
23 | assert u not in uu, 'Duplicate uid in spatialShape_ref.xls: %s' % u |
---|
24 | self.labByUid[u] = rr[0] |
---|
25 | self.uidByLab[rr[0]] = u |
---|
26 | self.ss[ k ] = (a,b,lf,li,u) |
---|
27 | |
---|
28 | class struct(object): |
---|
29 | def __init__(self): |
---|
30 | self.force = ('str-a02',) |
---|
31 | self.f = '../workbook/inputs/structRef.xls' |
---|
32 | self.ih = ['Label', 'Title', 'Spatial shape', 'Other dims', 'coords', 'cell methods', 'cell measures', 'flag meanings', 'flag values', 'Spatial shape (title)', 'Spatial dimensions'] |
---|
33 | |
---|
34 | old = False |
---|
35 | if old: |
---|
36 | ii = open( '../workbook/inputs/strUidMap.txt' ) |
---|
37 | self.uByLab = {} |
---|
38 | for l in ii.readlines(): |
---|
39 | l,u = l.strip().split() |
---|
40 | self.uByLab[ l ] = u |
---|
41 | ii.close() |
---|
42 | else: |
---|
43 | self.refStr() |
---|
44 | self.ldsp = loadSpatial() |
---|
45 | self.summary = {} |
---|
46 | ii = open( 'strRef2.txt' ) |
---|
47 | self.labs = set() |
---|
48 | for l in ii.readlines(): |
---|
49 | bits = l.strip().split( '\t' ) |
---|
50 | description = '' |
---|
51 | if len(bits) == 8: |
---|
52 | u, lab, spid, tmid, title, description, flag_values, tlab = bits |
---|
53 | else: |
---|
54 | print 'SEVERE: can not parse ',l |
---|
55 | raise |
---|
56 | description = description[1:-1] |
---|
57 | flag_values = flag_values[1:-1] |
---|
58 | self.summary[u] = (lab, tmid, spid, title, description, flag_values, tlab) |
---|
59 | self.labs.add(lab) |
---|
60 | ii.close() |
---|
61 | |
---|
62 | ## (i.tmid,i.spid,i.odims,i.coords,i.cell_methods,i.cell_measures,i.flag_meanings) |
---|
63 | ## Label Title Spatial shape Other dims coords cell methods cell measures flag meanings flag values Spatial shape (title) Spatial dimensions |
---|
64 | |
---|
65 | |
---|
66 | def refStr(self): |
---|
67 | wb = workbook( '../workbook/inputs/refStructure.xlsx' ) |
---|
68 | sht = wb.book.sheet_by_name( 's1' ) |
---|
69 | self.uByLab = {} |
---|
70 | for i in range(1,sht.nrows): |
---|
71 | r = [x.value for x in sht.row(i)] |
---|
72 | if len(r[0]) == 0 or r[0][0] != '#': |
---|
73 | uid = str( r[-1] ) |
---|
74 | lab = r[2] |
---|
75 | self.uByLab[lab] = uid |
---|
76 | ii = open( '../workbook/inputs/strUidMap.txt' ) |
---|
77 | for l in ii.readlines(): |
---|
78 | l,u = l.strip().split() |
---|
79 | if l not in self.uByLab: |
---|
80 | self.uByLab[ l ] = u |
---|
81 | ii.close() |
---|
82 | |
---|
83 | |
---|
84 | |
---|
85 | def read(self): |
---|
86 | self.map = dict() |
---|
87 | wb=workbook( self.f ) |
---|
88 | print wb.sns |
---|
89 | for sn in sorted( wb.sns ): |
---|
90 | if sn != '__mods__': |
---|
91 | sht = wb.book.sheet_by_name( sn ) |
---|
92 | nf = 0 |
---|
93 | for i in range( sht.nrows ): |
---|
94 | r = [x.value for x in sht.row(i)] |
---|
95 | print '>>> ',r |
---|
96 | if r[1] != 'Label' and not( len(r[0]) > 0 and r[0][0] == '#' ): |
---|
97 | if r[1] not in self.uByLab: |
---|
98 | nf += 1 |
---|
99 | print 'LABEL NOT FOUND: ',r |
---|
100 | |
---|
101 | assert nf == 0,'nf= %s [%s]' % (nf,sn) |
---|
102 | for i in range( sht.nrows ): |
---|
103 | r = [x.value for x in sht.row(i)] |
---|
104 | if r[1] != 'Label' and not( len(r[0]) > 0 and r[0][0] == '#' ): |
---|
105 | print '>>> ',r |
---|
106 | u = self.uByLab[ r[1] ] |
---|
107 | if u in self.summary: |
---|
108 | lab, tmid, spid, title, description, fv, tlab = self.summary[u] |
---|
109 | if title != r[2]: |
---|
110 | print 'SEVERE: title mismatch :',title,r[2],r[1],lab,u |
---|
111 | t = (tmid,spid,r[4],r[5],r[6],r[7],r[8]) |
---|
112 | if t in self.map: |
---|
113 | print 'Duplicate lookup ... %s' % str(t) |
---|
114 | print '--------',(u,lab,title,description,fv) |
---|
115 | print '--------',self.map[t] |
---|
116 | self.map[t] = (u,lab,title,description,fv,tlab,True) |
---|
117 | else: |
---|
118 | if len(r[0]) > 8 and r[0][0] == '*': |
---|
119 | if r[3] in self.ldsp.uidByLab: |
---|
120 | spid = self.ldsp.uidByLab[r[3]] |
---|
121 | t = (sn,spid,r[4],r[5],r[6],r[7],r[8]) |
---|
122 | if t in self.map: |
---|
123 | print 'Duplicate lookup [2] ... %s' % str(t) |
---|
124 | print '--------',(u,lab,title,description,fv) |
---|
125 | print '--------',self.map[t] |
---|
126 | self.map[t] = (u,r[1],'','','',sn,False) |
---|
127 | print 'INFO.str.0042: generating incomplete record %s: %s' % (r[1],r[2]) |
---|
128 | else: |
---|
129 | print 'INFO.str.0041: ignoring %s: %s' % (r[1],r[2]) |
---|
130 | elif r[1] not in self.labs and r[1] not in self.force: |
---|
131 | print 'INFO.str.0040: ignoring %s: %s' % (r[1],r[2]) |
---|
132 | else: |
---|
133 | print 'ERROR: no uid found for ..:',sn,r |
---|
134 | |
---|
135 | def save(self): |
---|
136 | sh = shelve.open( 'inSh/structRef', 'n' ) |
---|
137 | sh['__info__'] = 'Created by ifiles.struct .. information about structures keyed on tmid, spid, odims, coords, cell_methods, cell_measures, flag_meanings' |
---|
138 | sh['__cols__'] = ['tmid', 'spid', 'odims', 'coords', 'cell_methods', 'cell_measures', 'flag_meanings', 'uid','label','title','description','flag_values', 'time_label'] |
---|
139 | s1 = set() |
---|
140 | ubl = dict() |
---|
141 | for k in self.map: |
---|
142 | u = self.map[k][0] |
---|
143 | sh[u] = list(k) + list(self.map[k]) |
---|
144 | u,l = sh[u][7:9] |
---|
145 | ubl[l] = u |
---|
146 | rr = sh[u][:7] |
---|
147 | rr[0] = self.map[k][-1] |
---|
148 | tt = tuple(rr) |
---|
149 | if tt in s1: |
---|
150 | print 'SEVERE: duplicate of prospective lookup .... ',tt |
---|
151 | s1.add(tt) |
---|
152 | if tt[5] == '@OPT': |
---|
153 | print tt, u, sh[u] |
---|
154 | sh.close() |
---|
155 | sh = shelve.open( 'inSh/structRef02', 'n' ) |
---|
156 | sh['__info__'] = 'Created by ifiles.struct .. label to uid' |
---|
157 | sh['__cols__'] = ['uid','label'] |
---|
158 | for l in self.uByLab: |
---|
159 | if l not in ubl: |
---|
160 | sh[str(l)] = str( self.uByLab[l] ) |
---|
161 | print 'INFO.structure.50080: adding uid-by-lab record: %s %s' % (self.uByLab[l],l) |
---|
162 | sh.close() |
---|
163 | |
---|
164 | |
---|
165 | class probDup(object): |
---|
166 | def __init__(self): |
---|
167 | self.fl = sorted( glob.glob( 'ingest_files/cmv_probDuplication*.xls' ) ) |
---|
168 | self.cc = collections.defaultdict( list ) |
---|
169 | self.cch = dict() |
---|
170 | self.records = dict() |
---|
171 | |
---|
172 | def read(self): |
---|
173 | for f in self.fl: |
---|
174 | wb=workbook( f ) |
---|
175 | sht = wb.book.sheet_by_name( 'Sheet1' ) |
---|
176 | key = None |
---|
177 | for i in range( sht.nrows ): |
---|
178 | rr = [x.value for x in sht.row(i)] |
---|
179 | if rr[0] == 'NEXT': |
---|
180 | key = tuple( rr[1:] ) |
---|
181 | else: |
---|
182 | u = rr[1] |
---|
183 | assert u not in self.records, 'Duplicate mappings for %s' % u |
---|
184 | self.records[u] = rr |
---|
185 | self.cc[key].append( rr[:2] ) |
---|
186 | |
---|
187 | def review(self): |
---|
188 | print 'INFO:number of records: %s' % len(self.cc.keys()) |
---|
189 | nsk = 0 |
---|
190 | nch = 0 |
---|
191 | for k in self.cc: |
---|
192 | c1 = collections.defaultdict( set ) |
---|
193 | kb = set() |
---|
194 | for r in self.cc[k]: |
---|
195 | c1[r[0]].add( r[1] ) |
---|
196 | if r[0] not in ['+','-']: |
---|
197 | kb.add( r[0] ) |
---|
198 | if len(kb) > 0: |
---|
199 | print 'WARN: unrecognised directives: %s: %s' % (str(k),str(kb)) |
---|
200 | elif len( c1['-'] ) == 0: |
---|
201 | nsk += 1 |
---|
202 | elif len( c1['+'] ) == 1: |
---|
203 | nch += 1 |
---|
204 | assert k not in self.cch, 'Duplicate change record key: %s' % k |
---|
205 | self.cch[k] = (list( c1['+'] )[0],sorted( list( c1['-'] ) ) ) |
---|
206 | else: |
---|
207 | print 'WARN: cannot scan directives: %s: n+: %s; n-:%s' % (str(k),len( c1['+'] ),len( c1['-'] )) |
---|
208 | print 'INFO: records with no actions: %s' % nsk |
---|
209 | print 'INFO: records with change instruction: %s' % nch |
---|
210 | |
---|
211 | if __name__ == '__main__': |
---|
212 | #pd = probDup() |
---|
213 | #pd.read() |
---|
214 | #pd.review() |
---|
215 | |
---|
216 | s = struct() |
---|
217 | s.read() |
---|
218 | s.save() |
---|