1 | """This module provides a basic python API to the Data Request. |
---|
2 | After ingesting the XML documents (configuration and request) the module generates two python objects: |
---|
3 | 1. A collection of records |
---|
4 | 2. Index |
---|
5 | """ |
---|
6 | import xml, string, collections |
---|
7 | import xml.dom |
---|
8 | import xml.dom.minidom |
---|
9 | import re, shelve |
---|
10 | from __init__ import DOC_DIR |
---|
11 | |
---|
12 | jsh='''<link type="text/css" href="/css/jquery-ui-1.8.16.custom.css" rel="Stylesheet" /> |
---|
13 | <script src="/js/2013/jquery.min.js" type="text/javascript"></script> |
---|
14 | <script src="/js/2013/jquery-ui.min.js" type="text/javascript"></script> |
---|
15 | <script src="/js/2013/jquery.cookie.js" type="text/javascript"></script> |
---|
16 | ''' |
---|
17 | |
---|
18 | blockSchemaFile = '%s/%s' % (DOC_DIR, 'BlockSchema.csv' ) |
---|
19 | |
---|
20 | def loadBS(bsfile): |
---|
21 | """Read in the 'BlockSchema' definitions of the attributes defining attributes""" |
---|
22 | ii = open( bsfile, 'r' ).readlines() |
---|
23 | ll = [] |
---|
24 | for l in ii: |
---|
25 | ll.append( [x for x in l.strip().split('\t') ] ) |
---|
26 | cc = collections.defaultdict( dict ) |
---|
27 | |
---|
28 | for l in ll[3:]: |
---|
29 | if len(l) < len(ll[2]): |
---|
30 | l.append( '' ) |
---|
31 | try: |
---|
32 | for i in range( len(ll[2]) ): |
---|
33 | cc[l[0]][ll[2][i]] = l[i] |
---|
34 | except: |
---|
35 | print l |
---|
36 | raise |
---|
37 | return cc |
---|
38 | |
---|
39 | class rechecks(object): |
---|
40 | """Checks to be applied to strings""" |
---|
41 | def __init__(self): |
---|
42 | self.__isInt = re.compile( '-{0,1}[0-9]+' ) |
---|
43 | |
---|
44 | def isIntStr( self, tv ): |
---|
45 | """Check whether a string is a valid representation of an integer.""" |
---|
46 | if type( tv ) not in [type(''),type(u'')]: |
---|
47 | self.reason = 'NOT STRING' |
---|
48 | return False |
---|
49 | ok = self.__isInt.match( tv ) != None |
---|
50 | if not ok: |
---|
51 | self.reason = 'Failed to match regular expression for integers' |
---|
52 | else: |
---|
53 | self.reason = '' |
---|
54 | return ok |
---|
55 | |
---|
56 | class dreqItemBase(object): |
---|
57 | __doc__ = """A base class used in the definition of records. Designed to be used via a class factory which sets "itemLabelMode" and "attributes" before the class is instantiated: attempting to instantiate the class before setting these will trigger an exception.""" |
---|
58 | _indexInitialised = False |
---|
59 | _inx = None |
---|
60 | _urlBase = '' |
---|
61 | _htmlStyle = {} |
---|
62 | _linkAttrStyle = {} |
---|
63 | |
---|
64 | def __init__(self,idict=None,xmlMiniDom=None,id='defaultId',etree=False): |
---|
65 | dictMode = idict != None |
---|
66 | mdMode = xmlMiniDom != None |
---|
67 | self._htmlTtl = None |
---|
68 | assert not( dictMode and mdMode), 'Mode must be either dictionary of minidom: both assigned' |
---|
69 | assert dictMode or mdMode, 'Mode must be either dictionary of minidom: neither assigned' |
---|
70 | ##self._defaults = { } |
---|
71 | ##self._globalDefault = '__unset__' |
---|
72 | self._contentInitialised = False |
---|
73 | self._greenIcon = '<img height="12pt" src="/images/154g.png" alt="[i]"/>' |
---|
74 | if dictMode: |
---|
75 | self.dictInit( idict ) |
---|
76 | elif mdMode: |
---|
77 | self.mdInit( xmlMiniDom, etree=etree ) |
---|
78 | |
---|
79 | def __repr__(self): |
---|
80 | """Provide a one line summary of identifying the object.""" |
---|
81 | if self._contentInitialised: |
---|
82 | return 'Item <%s>: [%s] %s' % (self._h.title,self.label,self.title) |
---|
83 | else: |
---|
84 | return 'Item <%s>: uninitialised' % self._h.title |
---|
85 | |
---|
86 | def __info__(self,full=False): |
---|
87 | """Print a summary of the data held in the object as a list of key/value pairs""" |
---|
88 | if self._contentInitialised: |
---|
89 | print ( 'Item <%s>: [%s] %s' % (self._h.title,self.label,self.title) ) |
---|
90 | for a in self.__dict__.keys(): |
---|
91 | if a[0] != '_' or full: |
---|
92 | if hasattr( self._a[a], 'useClass') and self._a[a].useClass == 'internalLink' and self._base._indexInitialised: |
---|
93 | if self.__dict__[a] in self._base._inx.uid: |
---|
94 | targ = self._base._inx.uid[ self.__dict__[a] ] |
---|
95 | print ( ' %s: [%s]%s [%s]' % ( a, targ._h.label, targ.label, self.__dict__[a] ) ) |
---|
96 | else: |
---|
97 | print ( ' %s: [ERROR: key not found] [%s]' % ( a, self.__dict__[a] ) ) |
---|
98 | else: |
---|
99 | print ( ' %s: %s' % ( a, self.__dict__[a] ) ) |
---|
100 | else: |
---|
101 | print ( 'Item <%s>: uninitialised' % self.sectionLabel ) |
---|
102 | |
---|
103 | def __href__(self,odir="",label=None): |
---|
104 | """Generate html text for a link to this item.""" |
---|
105 | igns = ['','__unset__'] |
---|
106 | if self._htmlTtl == None: |
---|
107 | if 'description' in self.__dict__ and self.description != None and string.strip( self.description ) not in igns: |
---|
108 | ttl = self.description |
---|
109 | elif 'title' in self.__dict__ and self.title != None and string.strip( self.title ) not in igns: |
---|
110 | ttl = self.title |
---|
111 | else: |
---|
112 | ttl = self.label |
---|
113 | ttl = string.replace( ttl,'"', '"' ) |
---|
114 | ttl = string.replace( ttl,'<', '<' ) |
---|
115 | self._htmlTtl = string.replace( ttl,'>', '>' ) |
---|
116 | if label == None: |
---|
117 | label = self.uid |
---|
118 | |
---|
119 | return '<span title="%s"><a href="%s%s.html">%s</a></span>' % (self._htmlTtl,odir,self.uid,label) |
---|
120 | |
---|
121 | def getHtmlLinkAttrStyle(self,a): |
---|
122 | """Return a string containing a html fragment for a link to an attribute.""" |
---|
123 | if a in self.__class__._linkAttrStyle: |
---|
124 | return self.__class__._linkAttrStyle[a] |
---|
125 | else: |
---|
126 | return lambda a,targ, frm='': '<li>%s: [%s] %s [%s]</li>' % ( a, targ._h.label, targ.label, targ.__href__() ) |
---|
127 | |
---|
128 | def __html__(self,ghis=None): |
---|
129 | """Create html view""" |
---|
130 | msg = [] |
---|
131 | if self._contentInitialised: |
---|
132 | sect = self._h.label |
---|
133 | msg.append( '<h1>%s: [%s] %s</h1>' % (self._h.title,self.label,self.title) ) |
---|
134 | msg.append( '<a href="../index.html">Home</a> → <a href="../index/%s.html">%s section index</a><br/>\n' % (sect, self._h.title) ) |
---|
135 | msg.append( '<ul>' ) |
---|
136 | for a in self.__dict__.keys(): |
---|
137 | if a[0] != '_': |
---|
138 | app = '%s%s' % (a, self.__class__.__dict__[a].__href__(label=self._greenIcon) ) |
---|
139 | if hasattr( self._a[a], 'useClass') and self._a[a].useClass == 'internalLink' and self._base._indexInitialised: |
---|
140 | if self.__dict__[a] == '__unset__': |
---|
141 | m = '<li>%s: %s [missing link]</li>' % ( app, self.__dict__[a] ) |
---|
142 | else: |
---|
143 | try: |
---|
144 | targ = self._base._inx.uid[ self.__dict__[a] ] |
---|
145 | lst = self.getHtmlLinkAttrStyle(a) |
---|
146 | m = lst( app, targ, frm=sect ) |
---|
147 | except: |
---|
148 | print ( a, self.__dict__[a], sect ) |
---|
149 | m = '<li>%s: %s .... broken link</li>' % ( app, self.__dict__[a] ) |
---|
150 | ##raise |
---|
151 | ##m = '<li>%s, %s: [%s] %s [%s]</li>' % ( a, self.__class__.__dict__[a].__href__(label=self._greenIcon), targ._h.label, targ.label, targ.__href__() ) |
---|
152 | elif hasattr( self._a[a], 'useClass') and self._a[a].useClass == 'externalUrl': |
---|
153 | m = '<li>%s: <a href="%s" title="%s">%s</a></li>' % ( app, self.__dict__[a], self._a[a].description, self._a[a].title ) |
---|
154 | else: |
---|
155 | m = '<li>%s: %s</li>' % ( app, self.__dict__[a] ) |
---|
156 | msg.append( m ) |
---|
157 | msg.append( '</ul>' ) |
---|
158 | ## |
---|
159 | ## add list of inward references |
---|
160 | ## |
---|
161 | if self._base._indexInitialised: |
---|
162 | f1 = self._htmlStyle.get( sect, {} ).get( 'getIrefs', None ) != None |
---|
163 | if f1: |
---|
164 | tl = [] |
---|
165 | if f1: |
---|
166 | tl = self._htmlStyle[sect]['getIrefs'] |
---|
167 | doall = '__all__' in tl |
---|
168 | if doall: |
---|
169 | tl = self._inx.iref_by_sect[self.uid].a.keys() |
---|
170 | tl1 = [] |
---|
171 | for t in tl: |
---|
172 | if t in self._inx.iref_by_sect[self.uid].a and len( self._inx.iref_by_sect[self.uid].a[t] ) > 0: |
---|
173 | tl1.append( t ) |
---|
174 | am = [] |
---|
175 | if len(tl1) > 0: |
---|
176 | am.append( '''<div class="demo"> |
---|
177 | <div id="tabs"> |
---|
178 | <ul>''' ) |
---|
179 | for t in tl1: |
---|
180 | u0 = self._inx.iref_by_sect[self.uid].a[t][0] |
---|
181 | am.append( '<li><a href="#tabs-%s">%s</a></li>' % (t,self._inx.uid[u0]._h.title ) ) |
---|
182 | am.append( '</ul>' ) |
---|
183 | for t in tl1: |
---|
184 | u0 = self._inx.iref_by_sect[self.uid].a[t][0] |
---|
185 | am.append( '<div id="tabs-%s">' % t ) |
---|
186 | am.append( '<h3>%s</h3>' % self._inx.uid[u0]._h.title ) |
---|
187 | am.append( '<ul>' ) |
---|
188 | items = [self._inx.uid[u] for u in self._inx.iref_by_sect[self.uid].a[t] ] |
---|
189 | items.sort( ds('label').cmp ) |
---|
190 | for targ in items: |
---|
191 | if ghis == None: |
---|
192 | m = '<li>%s:%s [%s]</li>' % ( targ._h.label, targ.label, targ.__href__() ) |
---|
193 | else: |
---|
194 | lst = ghis( targ._h.label ) |
---|
195 | m = lst( targ, frm=sect ) |
---|
196 | am.append( m ) |
---|
197 | am.append( '</ul>' ) |
---|
198 | am.append( '</div>' ) |
---|
199 | if len(am) > 0: |
---|
200 | am.append( '</div>' ) |
---|
201 | msg.append( '<h2>Links from other sections</h2>' ) |
---|
202 | msg.append( ''' <script> |
---|
203 | $(function() { |
---|
204 | $( "#tabs" ).tabs({cookie: { expires: 1 } }); |
---|
205 | }); |
---|
206 | </script> |
---|
207 | <!-- how to make tab selection stick: http://stackoverflow.com/questions/5066581/jquery-ui-tabs-wont-save-selected-tab-index-upon-page-reload expiry time in days-->''' ) |
---|
208 | for m in am: |
---|
209 | msg.append(m) |
---|
210 | |
---|
211 | else: |
---|
212 | msg.append( '<b>Item %s: uninitialised</b>' % self.sectionLabel ) |
---|
213 | return msg |
---|
214 | |
---|
215 | |
---|
216 | def dictInit( self, idict ): |
---|
217 | __doc__ = """Initialise from a dictionary.""" |
---|
218 | for a in self._a.keys(): |
---|
219 | if a in idict: |
---|
220 | val = idict[a] |
---|
221 | else: |
---|
222 | val = self._d.defaults.get( a, self._d.glob ) |
---|
223 | setattr( self, a, val ) |
---|
224 | self._contentInitialised = True |
---|
225 | |
---|
226 | def mdInit( self, el, etree=False ): |
---|
227 | __doc__ = """Initialisation from a mindom XML element. The list of attributes must be set by the class factory before the class is initialised""" |
---|
228 | deferredHandling=False |
---|
229 | nw1 = 0 |
---|
230 | tvtl = [] |
---|
231 | if etree: |
---|
232 | ks = set( el.keys() ) |
---|
233 | for a in self._a.keys(): |
---|
234 | if a in ks: |
---|
235 | aa = '%s%s' % (self.ns,a) |
---|
236 | tvtl.append( (a,True, str( el.get( a ) ) ) ) |
---|
237 | else: |
---|
238 | tvtl.append( (a,False,None) ) |
---|
239 | else: |
---|
240 | for a in self._a.keys(): |
---|
241 | if el.hasAttribute( a ): |
---|
242 | tvtl.append( (a,True, str( el.getAttribute( a ) ) ) ) |
---|
243 | else: |
---|
244 | tvtl.append( (a,False,None) ) |
---|
245 | |
---|
246 | for a,tv,v in tvtl: |
---|
247 | if tv: |
---|
248 | if self._a[a].type == u'xs:float': |
---|
249 | try: |
---|
250 | v = float(v) |
---|
251 | except: |
---|
252 | print ( 'Failed to convert real number: %s' % v ) |
---|
253 | raise |
---|
254 | elif self._a[a].type == u'xs:integer': |
---|
255 | if self._rc.isIntStr( v ): |
---|
256 | v = int(v) |
---|
257 | else: |
---|
258 | v = string.strip(v) |
---|
259 | thissect = '%s [%s]' % (self._h.title,self._h.label) |
---|
260 | if v in [ '',u'',' ', u' ']: |
---|
261 | if nw1 < 20: |
---|
262 | print ( 'WARN.050.0001: input integer non-compliant: %s: %s: "%s" -- set to zero' % (thissect,a,v) ) |
---|
263 | nw1 += 1 |
---|
264 | v = 0 |
---|
265 | else: |
---|
266 | try: |
---|
267 | v = int(float(v)) |
---|
268 | print ( 'WARN: input integer non-compliant: %s: %s: %s' % (thissect,a,v) ) |
---|
269 | except: |
---|
270 | msg = 'ERROR: failed to convert integer: %s: %s: %s' % (thissect,a,v) |
---|
271 | deferredHandling=True |
---|
272 | elif self._a[a].type == u'xs:boolean': |
---|
273 | v = v in ['true','1'] |
---|
274 | self.__dict__[a] = v |
---|
275 | else: |
---|
276 | if a in ['uid',]: |
---|
277 | thissect = '%s [%s]' % (self._h.title,self._h.tag) |
---|
278 | print ( 'ERROR.020.0001: missing uid: %s' % thissect ) |
---|
279 | if etree: |
---|
280 | print ( ks ) |
---|
281 | import sys |
---|
282 | sys.exit(0) |
---|
283 | self.__dict__[a] = self._d.defaults.get( a, self._d.glob ) |
---|
284 | |
---|
285 | ##if type( self.__dict__.get( 'rowIndex', 0 ) ) != type(0): |
---|
286 | ##print 'Bad row index ', el.hasAttribute( 'rowIndex' ) |
---|
287 | ##raise |
---|
288 | if deferredHandling: |
---|
289 | print ( msg ) |
---|
290 | |
---|
291 | self._contentInitialised = True |
---|
292 | |
---|
293 | |
---|
294 | class config(object): |
---|
295 | """Read in a vocabulary collection configuration document and a vocabulary document""" |
---|
296 | |
---|
297 | def __init__(self, configdoc='out/dreqDefn.xml', thisdoc='../workbook/trial_20150724.xml', useShelve=False): |
---|
298 | self.rc = rechecks() |
---|
299 | self.silent = True |
---|
300 | self.vdef = configdoc |
---|
301 | self.vsamp = thisdoc |
---|
302 | |
---|
303 | self.nts = collections.namedtuple( 'sectdef', ['tag','label','title','id','itemLabelMode','level','maxOccurs','labUnique','uid'] ) |
---|
304 | self.nti = collections.namedtuple( 'itemdef', ['tag','label','title','type','useClass','techNote'] ) |
---|
305 | self.ntt = collections.namedtuple( 'sectinit', ['header','attributes','defaults'] ) |
---|
306 | self.nt__default = collections.namedtuple( 'deflt', ['defaults','glob'] ) |
---|
307 | self.ntf = collections.namedtuple( 'sect', ['header','attDefn','items'] ) |
---|
308 | self.bscc = loadBS(blockSchemaFile) |
---|
309 | |
---|
310 | self.coll = {} |
---|
311 | doc = xml.dom.minidom.parse( self.vdef ) |
---|
312 | ## |
---|
313 | ## elementTree parsing implemented for main document |
---|
314 | ## |
---|
315 | self.etree = False |
---|
316 | self.etree = True |
---|
317 | if self.etree: |
---|
318 | import xml.etree.cElementTree as cel |
---|
319 | |
---|
320 | self.contentDoc = cel.parse( self.vsamp ) |
---|
321 | root = self.contentDoc.getroot() |
---|
322 | ##bs = string.split( root.tag, '}' ) |
---|
323 | bs = root.tag.split( '}' ) |
---|
324 | if len( bs ) > 1: |
---|
325 | self.ns = bs[0] + '}' |
---|
326 | else: |
---|
327 | self.ns = None |
---|
328 | else: |
---|
329 | self.contentDoc = xml.dom.minidom.parse( self.vsamp ) |
---|
330 | self.ns = None |
---|
331 | |
---|
332 | vl = doc.getElementsByTagName( 'table' ) |
---|
333 | self.tables = {} |
---|
334 | tables = {} |
---|
335 | self.tableClasses = {} |
---|
336 | self.tableItems = collections.defaultdict( list ) |
---|
337 | ## |
---|
338 | ## this loads in some metadata, but not yet in a useful way. |
---|
339 | ## |
---|
340 | self._t0 = self.parsevcfg(None) |
---|
341 | self._tableClass0 = self.itemClassFact( self._t0, ns=self.ns ) |
---|
342 | ## |
---|
343 | ## define a class for the section heading records. |
---|
344 | ## |
---|
345 | self._t1 = self.parsevcfg('__sect__') |
---|
346 | self._t2 = self.parsevcfg('__main__') |
---|
347 | self._sectClass0 = self.itemClassFact( self._t1, ns=self.ns ) |
---|
348 | |
---|
349 | self.tt0 = {} |
---|
350 | for k in self.bscc: |
---|
351 | self.tt0[k] = self._tableClass0(idict=self.bscc[k]) |
---|
352 | if k in self._t0.attributes: |
---|
353 | setattr( self._tableClass0, '%s' % k, self.tt0[k] ) |
---|
354 | if k in self._t1.attributes: |
---|
355 | setattr( self._sectClass0, '%s' % k, self.tt0[k] ) |
---|
356 | |
---|
357 | ## |
---|
358 | ## save header information, as for recordAttributeDefn below |
---|
359 | ## |
---|
360 | self._recAtDef = {'__core__':self._t0, '__sect__':self._t1} |
---|
361 | ## |
---|
362 | ## experimental addition of __core__ to coll dictionary .. |
---|
363 | ## |
---|
364 | self.coll['__core__'] = self.ntf( self._t0.header, self._t0.attributes, [self.tt0[k] for k in self.tt0] ) |
---|
365 | ##self.coll[k] = self.ntf( self.recordAttributeDefn[k].header, self.recordAttributeDefn[k].attributes, self.tableItems[k] ) |
---|
366 | |
---|
367 | self.tt1 = {} |
---|
368 | self.ttl2 = [] |
---|
369 | for v in vl: |
---|
370 | t = self.parsevcfg(v) |
---|
371 | tables[t[0].label] = t |
---|
372 | self.tableClasses[t[0].label] = self.itemClassFact( t, ns=self.ns ) |
---|
373 | thisc = self.tableClasses[t[0].label] |
---|
374 | self.tt1[t[0].label] = self._sectClass0( idict=t.header._asdict() ) |
---|
375 | self.tt1[t[0].label].maxOccurs = t.header.maxOccurs |
---|
376 | self.tt1[t[0].label].labUnique = t.header.labUnique |
---|
377 | self.tt1[t[0].label].level = t.header.level |
---|
378 | self.tt1[t[0].label].itemLabelMode = t.header.itemLabelMode |
---|
379 | self.ttl2 += [thisc.__dict__[a] for a in t.attributes] |
---|
380 | self.coll['__main__'] = self.ntf( self._t2.header, self._t2.attributes, self.ttl2 ) |
---|
381 | |
---|
382 | self.coll['__sect__'] = self.ntf( self._t1.header, self._t1.attributes, [self.tt1[k] for k in self.tt1] ) |
---|
383 | |
---|
384 | self.recordAttributeDefn = tables |
---|
385 | for k in tables.keys(): |
---|
386 | if self.etree: |
---|
387 | vl = root.findall( './/%s%s' % (self.ns,k) ) |
---|
388 | if len(vl) == 1: |
---|
389 | v = vl[0] |
---|
390 | t = v.get( 'title' ) |
---|
391 | i = v.get( 'id' ) |
---|
392 | uid = v.get( 'uid' ) |
---|
393 | useclass = v.get( 'useClass' ) |
---|
394 | |
---|
395 | self.tt1[k].label = k |
---|
396 | self.tt1[k].title = t |
---|
397 | self.tt1[k].id = i |
---|
398 | self.tt1[k].uid = uid |
---|
399 | self.tt1[k].useClass = useclass |
---|
400 | self.tableClasses[k]._h = self.tt1[k] |
---|
401 | il = v.findall( '%sitem' % self.ns ) |
---|
402 | self.info( '%s, %s, %s, %s' % ( k, t, i, len(il) ) ) |
---|
403 | |
---|
404 | self.tables[k] = (i,t,len(il)) |
---|
405 | |
---|
406 | for i in il: |
---|
407 | ii = self.tableClasses[k](xmlMiniDom=i, etree=True) |
---|
408 | self.tableItems[k].append( ii ) |
---|
409 | elif len(vl) > 1: |
---|
410 | assert False, 'not able to handle repeat sections with etree yet' |
---|
411 | else: |
---|
412 | vl = self.contentDoc.getElementsByTagName( k ) |
---|
413 | if len(vl) == 1: |
---|
414 | v = vl[0] |
---|
415 | t = v.getAttribute( 'title' ) |
---|
416 | i = v.getAttribute( 'id' ) |
---|
417 | il = v.getElementsByTagName( 'item' ) |
---|
418 | self.info( '%s, %s, %s, %s' % ( k, t, i, len(il) ) ) |
---|
419 | |
---|
420 | self.tables[k] = (i,t,len(il)) |
---|
421 | |
---|
422 | for i in il: |
---|
423 | ii = self.tableClasses[k](xmlMiniDom=i) |
---|
424 | self.tableItems[k].append( ii ) |
---|
425 | elif len(vl) > 1: |
---|
426 | l1 = [] |
---|
427 | l2 = [] |
---|
428 | for v in vl: |
---|
429 | t = v.getAttribute( 'title' ) |
---|
430 | i = v.getAttribute( 'id' ) |
---|
431 | il = v.getElementsByTagName( 'item' ) |
---|
432 | self.info( '%s, %s, %s, %s' % ( k, t, i, len(il) ) ) |
---|
433 | l1.append( (i,t,len(il)) ) |
---|
434 | |
---|
435 | l2i = [] |
---|
436 | for i in il: |
---|
437 | ii = self.tableClasses[k](xmlMiniDom=i) |
---|
438 | l2i.append( ii ) |
---|
439 | l2.append( l2i ) |
---|
440 | self.tables[k] = l1 |
---|
441 | self.tableItems[k] = l2 |
---|
442 | self.coll[k] = self.ntf( self.recordAttributeDefn[k].header, self.recordAttributeDefn[k].attributes, self.tableItems[k] ) |
---|
443 | |
---|
444 | def info(self,ss): |
---|
445 | """Switchable print function ... switch off by setting self.silent=True""" |
---|
446 | if not self.silent: |
---|
447 | print ( ss ) |
---|
448 | |
---|
449 | ###def get(self): |
---|
450 | ###return self.coll |
---|
451 | |
---|
452 | def itemClassFact(self, sectionInfo,ns=None): |
---|
453 | class dreqItem(dreqItemBase): |
---|
454 | """Inherits all methods from dreqItemBase. |
---|
455 | |
---|
456 | USAGE |
---|
457 | ----- |
---|
458 | The instanstiated object contains a single data record. The "_h" attribute links to information about the record and the section it belongs to. |
---|
459 | |
---|
460 | object._a: a python dictionary defining the attributes in each record. The keys in the dictionary correspond to the attribute names and the values are python "named tuples" (from the "collections" module). E.g. object._a['priority'].type contains the type of the 'priority' attribute. Type is expressed using XSD schema language, so "xs:integer" implies integer. The "useClass" attribute carries information about usage. If object._a['xxx'].useClass = u'internalLink' then the record attribute provides a link to another element and object.xxx is the unique identifier of that element. |
---|
461 | |
---|
462 | object._h: a python named tuple describing the section. E.g. object._h.title is the section title (E.g. "CMOR Variables") |
---|
463 | """ |
---|
464 | _base=dreqItemBase |
---|
465 | |
---|
466 | dreqItem.__name__ = 'dreqItem_%s' % str( sectionInfo.header.label ) |
---|
467 | dreqItem._h = sectionInfo.header |
---|
468 | dreqItem._a = sectionInfo.attributes |
---|
469 | dreqItem._d = sectionInfo.defaults |
---|
470 | if sectionInfo.attributes != None: |
---|
471 | self.addAttributes(dreqItem, sectionInfo.attributes ) |
---|
472 | ##dreqItem.itemLabelMode = itemLabelMode |
---|
473 | ##dreqItem.attributes = attributes |
---|
474 | dreqItem._rc = self.rc |
---|
475 | dreqItem.ns = ns |
---|
476 | return dreqItem |
---|
477 | |
---|
478 | def addAttributes( self, thisClass, attrDict ): |
---|
479 | """Add a set of attributes, from a dictionary, to a class""" |
---|
480 | for k in attrDict: |
---|
481 | setattr( thisClass, '%s' % k , attrDict[k] ) |
---|
482 | |
---|
483 | def parsevcfg(self,v): |
---|
484 | """Parse a section definition element, including all the record attributes. The results are returned as a namedtuple of attributes for the section and a dictionary of record attribute specifications.""" |
---|
485 | if v in [ None,'__main__']: |
---|
486 | idict = {'description':'An extended description of the object', 'title':'Record Description', \ |
---|
487 | 'techNote':'', 'useClass':'__core__', 'superclass':'rdf:property',\ |
---|
488 | 'type':'xs:string', 'uid':'__core__:description', 'label':'label' } |
---|
489 | if v == None: |
---|
490 | vtt = self.nts( '__core__', 'CoreAttributes', 'X.1 Core Attributes', '00000000', 'def', '0', '0', 'false', '__core__' ) |
---|
491 | else: |
---|
492 | vtt = self.nts( '__main__', 'DataRequestAttributes', 'X.2 Data Request Attributes', '00000001', 'def', '0', '0', 'false', '__main__' ) |
---|
493 | elif v == '__sect__': |
---|
494 | idict = {'title':'Record Description', \ |
---|
495 | 'uid':'__core__:description', 'label':'label', 'useClass':'text', 'id':'id', 'maxOccurs':'', 'itemLabelMode':'', 'level':'', 'labUnique':'' } |
---|
496 | vtt = self.nts( '__sect__', 'sectionAttributes', 'X.3 Section Attributes', '00000000', 'def', '0', '0', 'false', '__sect__' ) |
---|
497 | ##<var label="var" uid="SECTION:var" useClass="vocab" title="MIP Variable" id="cmip.drv.001"> |
---|
498 | else: |
---|
499 | l = v.getAttribute( 'label' ) |
---|
500 | t = v.getAttribute( 'title' ) |
---|
501 | i = v.getAttribute( 'id' ) |
---|
502 | ilm = v.getAttribute( 'itemLabelMode' ) |
---|
503 | lev = v.getAttribute( 'level' ) |
---|
504 | maxo = v.getAttribute( 'maxOccurs' ) |
---|
505 | labu = v.getAttribute( 'labUnique' ) |
---|
506 | il = v.getElementsByTagName( 'rowAttribute' ) |
---|
507 | vtt = self.nts( v.nodeName, l,t,i,ilm,lev, maxo, labu, 's__%s' % v.nodeName ) |
---|
508 | idict = {} |
---|
509 | for i in il: |
---|
510 | tt = self.parseicfg(i) |
---|
511 | idict[tt.label] = tt |
---|
512 | deflt = self.nt__default( {}, '__unset__' ) |
---|
513 | return self.ntt( vtt, idict, deflt ) |
---|
514 | |
---|
515 | def parseicfg(self,i): |
---|
516 | """Parse a record attribute specification""" |
---|
517 | defs = {'type':"xs:string"} |
---|
518 | ll = [] |
---|
519 | ee = {} |
---|
520 | for k in ['label','title','type','useClass','techNote','description','uid']: |
---|
521 | if i.hasAttribute( k ): |
---|
522 | ll.append( i.getAttribute( k ) ) |
---|
523 | else: |
---|
524 | ll.append( defs.get( k, None ) ) |
---|
525 | ee[k] = ll[-1] |
---|
526 | l, t, ty, cls, tn, desc, uid = ll |
---|
527 | self.lastTitle = t |
---|
528 | |
---|
529 | returnClass = True |
---|
530 | if returnClass: |
---|
531 | return self._tableClass0( idict=ee ) |
---|
532 | else: |
---|
533 | return self.nti( i.nodeName, l,t,ty,cls,tn ) |
---|
534 | |
---|
535 | class container(object): |
---|
536 | """Simple container class, to hold a set of dictionaries of lists.""" |
---|
537 | def __init__(self, atl ): |
---|
538 | self.uid = {} |
---|
539 | for a in atl: |
---|
540 | self.__dict__[a] = collections.defaultdict( list ) |
---|
541 | |
---|
542 | class c1(object): |
---|
543 | def __init__(self): |
---|
544 | self.a = collections.defaultdict( list ) |
---|
545 | |
---|
546 | class index(object): |
---|
547 | """Create an index of the document. Cross-references are generated from attributes with class 'internalLink'. |
---|
548 | This version assumes that each record is identified by an "uid" attribute and that there is a "var" section. |
---|
549 | Invalid internal links are recorded in tme "missingIds" dictionary. |
---|
550 | For any record, with identifier u, iref_by_uid[u] gives a list of the section and identifier of records linking to that record. |
---|
551 | """ |
---|
552 | |
---|
553 | def __init__(self, dreq): |
---|
554 | self.silent = True |
---|
555 | self.uid = {} |
---|
556 | self.uid2 = collections.defaultdict( list ) |
---|
557 | nativeAtts = ['uid','iref_by_uid','iref_by_sect','missingIds'] |
---|
558 | naok = map( lambda x: not x in dreq, nativeAtts ) |
---|
559 | assert all(naok), 'This version cannot index collections containing sections with names: %s' % str( nativeAtts ) |
---|
560 | self.var_uid = {} |
---|
561 | self.var_by_name = collections.defaultdict( list ) |
---|
562 | self.var_by_sn = collections.defaultdict( list ) |
---|
563 | self.iref_by_uid = collections.defaultdict( list ) |
---|
564 | irefdict = collections.defaultdict( list ) |
---|
565 | for k in dreq.keys(): |
---|
566 | if 'sn' in dreq[k].attDefn: |
---|
567 | self.__dict__[k] = container( ['label','sn'] ) |
---|
568 | else: |
---|
569 | self.__dict__[k] = container( ['label'] ) |
---|
570 | ## |
---|
571 | ## collected names of attributes which carry internal links |
---|
572 | ## |
---|
573 | for ka in dreq[k].attDefn.keys(): |
---|
574 | if hasattr( dreq[k].attDefn[ka], 'useClass') and dreq[k].attDefn[ka].useClass == 'internalLink': |
---|
575 | irefdict[k].append( ka ) |
---|
576 | |
---|
577 | for k in dreq.keys(): |
---|
578 | for i in dreq[k].items: |
---|
579 | assert 'uid' in i.__dict__, 'uid not found::\n%s\n%s' % (str(i._h),str(i.__dict__) ) |
---|
580 | if 'uid' in self.uid: |
---|
581 | print ( 'ERROR.100.0001: Duplicate uid: %s [%s]' % (i.uid,i._h.title) ) |
---|
582 | self.uid2[i.uid].append( (k,i) ) |
---|
583 | else: |
---|
584 | ### create index bx uid. |
---|
585 | self.uid[i.uid] = i |
---|
586 | |
---|
587 | self.missingIds = collections.defaultdict( list ) |
---|
588 | self.iref_by_sect = collections.defaultdict( c1 ) |
---|
589 | for k in dreq.keys(): |
---|
590 | for k2 in irefdict.get( k, [] ): |
---|
591 | n1 = 0 |
---|
592 | n2 = 0 |
---|
593 | for i in dreq[k].items: |
---|
594 | id2 = i.__dict__.get( k2 ) |
---|
595 | if id2 != '__unset__': |
---|
596 | sect = i._h.label |
---|
597 | ## append attribute name and target -- item i.uid, attribute k2 reference item id2 |
---|
598 | self.iref_by_uid[ id2 ].append( (k2,i.uid) ) |
---|
599 | self.iref_by_sect[ id2 ].a[sect].append( i.uid ) |
---|
600 | if id2 in self.uid: |
---|
601 | n1 += 1 |
---|
602 | else: |
---|
603 | n2 += 1 |
---|
604 | self.missingIds[id2].append( (k,k2,i.uid) ) |
---|
605 | self.info( 'INFO:: %s, %s: %s (%s)' % (k,k2,n1,n2) ) |
---|
606 | |
---|
607 | for k in dreq.keys(): |
---|
608 | for i in dreq[k].items: |
---|
609 | self.__dict__[k].uid[i.uid] = i |
---|
610 | self.__dict__[k].label[i.label].append( i.uid ) |
---|
611 | if 'sn' in dreq[k].attDefn: |
---|
612 | self.__dict__[k].sn[i.sn].append( i.uid ) |
---|
613 | |
---|
614 | def info(self,ss): |
---|
615 | if not self.silent: |
---|
616 | print ( ss ) |
---|
617 | |
---|
618 | class ds(object): |
---|
619 | """Comparison object to assist sorting of lists of dictionaries""" |
---|
620 | def __init__(self,k): |
---|
621 | self.k = k |
---|
622 | def cmp(self,x,y): |
---|
623 | return cmp( x.__dict__[self.k], y.__dict__[self.k] ) |
---|
624 | |
---|
625 | class kscl(object): |
---|
626 | """Comparison object to assist sorting of dictionaries of class instances""" |
---|
627 | def __init__(self,idict,k): |
---|
628 | self.k = k |
---|
629 | self.idict = idict |
---|
630 | def cmp(self,x,y): |
---|
631 | return cmp( self.idict[x].__dict__[self.k], self.idict[y].__dict__[self.k] ) |
---|
632 | |
---|
633 | src1 = '../workbook/trial_20150831.xml' |
---|
634 | |
---|
635 | #DEFAULT LOCATION -- changed automatically when building distribution |
---|
636 | defaultDreq = 'annotated_20150731.xml' |
---|
637 | #DEFAULT CONFIG |
---|
638 | defaultConfig = 'dreq2Defn.xml' |
---|
639 | |
---|
640 | defaultDreqPath = '%s/%s' % (DOC_DIR, defaultDreq ) |
---|
641 | defaultConfigPath = '%s/%s' % (DOC_DIR, defaultConfig ) |
---|
642 | |
---|
643 | class loadDreq(object): |
---|
644 | """Load in a vocabulary document. |
---|
645 | dreqXML: full path to the XML document |
---|
646 | configdoc: full path to associated configuration document |
---|
647 | useShelve: flag to specify whether to retrieve data from cache (not implemented) |
---|
648 | htmlStyles: dictionary of styling directives which influence structure of html page generates by the "makeHtml" method |
---|
649 | """ |
---|
650 | |
---|
651 | def __init__(self,dreqXML=defaultDreqPath, configdoc=defaultConfigPath, useShelve=False, htmlStyles=None ): |
---|
652 | self.c = config( thisdoc=dreqXML, configdoc=configdoc, useShelve=useShelve) |
---|
653 | self.coll = self.c.coll |
---|
654 | self.inx = index(self.coll) |
---|
655 | self.itemStyles = {} |
---|
656 | self.defaultItemLineStyle = lambda i, frm='', ann='': '<li>%s: %s</li>' % ( i.label, i.__href__(odir='../u/') ) |
---|
657 | ## |
---|
658 | ## add index to Item base class .. so that it can be accessed by item instances |
---|
659 | ## |
---|
660 | dreqItemBase._inx = self.inx |
---|
661 | dreqItemBase._indexInitialised = True |
---|
662 | ## |
---|
663 | ## load in additional styling directives |
---|
664 | ## |
---|
665 | if htmlStyles != None: |
---|
666 | for k in htmlStyles: |
---|
667 | dreqItemBase._htmlStyle[k] = htmlStyles[k] |
---|
668 | |
---|
669 | ## dreqItemBase._htmlStyle['__general__'] = {'addRemarks':True} |
---|
670 | |
---|
671 | self.pageTmpl = """<html><head><title>%s</title> |
---|
672 | %s |
---|
673 | <link rel="stylesheet" type="text/css" href="%scss/dreq.css"> |
---|
674 | </head><body> |
---|
675 | <div id="top">CMIP6 Data Request</div> |
---|
676 | %s</body></html>""" |
---|
677 | |
---|
678 | def getHtmlItemStyle(self, sect): |
---|
679 | """Get the styling method associated with a given section.""" |
---|
680 | if sect in self.itemStyles: |
---|
681 | return self.itemStyles[sect] |
---|
682 | return self.defaultItemLineStyle |
---|
683 | |
---|
684 | |
---|
685 | def _sectionSortHelper(self,title): |
---|
686 | ab = string.split( string.split(title)[0], '.' ) |
---|
687 | if len( ab ) == 2: |
---|
688 | a,b = ab |
---|
689 | ##sorter = lambda x: [int(y) for y in string.split( string.split(x,':')[0], '.' )] |
---|
690 | if self.c.rc.isIntStr(a): |
---|
691 | a = int(a) |
---|
692 | if self.c.rc.isIntStr(b): |
---|
693 | b = int(b) |
---|
694 | rv = (a,b) |
---|
695 | elif len(ab) == 1: |
---|
696 | rv = (ab[0],0) |
---|
697 | else: |
---|
698 | rv = ab |
---|
699 | return rv |
---|
700 | |
---|
701 | def makeHtml(self,odir='./html', ttl0 = 'Data Request Index', annotations=None): |
---|
702 | """Generate a html view of the vocabularies, using the "__html__" method of the vocabulary item class to generate a |
---|
703 | page for each item and also generating index pages. |
---|
704 | odir: directory for html files; |
---|
705 | ttl0: Title for main index (in odir/index.html)""" |
---|
706 | |
---|
707 | ks = self.inx.uid.keys() |
---|
708 | ks.sort( kscl( self.inx.uid, 'title' ).cmp ) |
---|
709 | for k in ks: |
---|
710 | i = self.inx.uid[k] |
---|
711 | ttl = 'Data Request Record: [%s]%s' % (i._h.label,i.label) |
---|
712 | bdy = string.join( i.__html__( ghis=self.getHtmlItemStyle ), '\n' ) |
---|
713 | oo = open( '%s/u/%s.html' % (odir,i.uid), 'w' ) |
---|
714 | oo.write( self.pageTmpl % (ttl, jsh, '../', bdy ) ) |
---|
715 | oo.close() |
---|
716 | |
---|
717 | msg0 = ['<h1>%s</h1>' % ttl0, '<ul>',] |
---|
718 | ks = sorted( self.coll.keys() ) |
---|
719 | ee = {} |
---|
720 | for k in ks: |
---|
721 | ee[self.coll[k].header.title] = k |
---|
722 | kks = sorted( ee.keys(), key = self._sectionSortHelper ) |
---|
723 | for kt in kks: |
---|
724 | k = ee[kt] |
---|
725 | ## |
---|
726 | ## sort on item label |
---|
727 | ## |
---|
728 | if annotations != None and k in annotations: |
---|
729 | ann = annotations[k] |
---|
730 | else: |
---|
731 | ann = {} |
---|
732 | |
---|
733 | self.coll[k].items.sort( ds('label').cmp ) |
---|
734 | ttl = 'Data Request Section: %s' % k |
---|
735 | msg0.append( '<li><a href="index/%s.html">%s [%s]</a></li>\n' % (k,self.coll[k].header.title,k) ) |
---|
736 | msg = ['<h1>%s</h1>\n' % ttl, '<ul>',] |
---|
737 | msg.append( '<a href="../index.html">Home</a><br/>\n' ) |
---|
738 | lst = self.getHtmlItemStyle(k) |
---|
739 | |
---|
740 | for i in self.coll[k].items: |
---|
741 | ##m = '<li>%s: %s</li>' % ( i.label, i.__href__(odir='../u/') ) |
---|
742 | |
---|
743 | m = lst( i, ann=ann.get( i.label ) ) |
---|
744 | msg.append( m ) |
---|
745 | msg.append( '</ul>' ) |
---|
746 | bdy = string.join( msg, '\n' ) |
---|
747 | oo = open( '%s/index/%s.html' % (odir,k), 'w' ) |
---|
748 | oo.write( self.pageTmpl % (ttl, '', '../', bdy ) ) |
---|
749 | oo.close() |
---|
750 | msg0.append( '</ul>' ) |
---|
751 | bdy = string.join( msg0, '\n' ) |
---|
752 | oo = open( '%s/index.html' % odir, 'w' ) |
---|
753 | oo.write( self.pageTmpl % (ttl0, '', '', bdy ) ) |
---|
754 | oo.close() |
---|
755 | |
---|
756 | if __name__ == '__main__': |
---|
757 | dreq = loadDreq( ) |
---|
758 | |
---|