1 | """This module provides a basic python API to the Data Request. |
---|
2 | After ingesting the XML documents (configuration and request) the module generates two python objects: |
---|
3 | 1. A collection of records |
---|
4 | 2. Index |
---|
5 | """ |
---|
6 | import xml, string, collections |
---|
7 | import xml.dom |
---|
8 | import xml.dom.minidom |
---|
9 | |
---|
10 | class dreqItemBase(object): |
---|
11 | __doc__ = """A base class used in the definition of records. Designed to be used via a class factory which sets "itemLabelMode" and "attributes" before the class is instantiated: attempting to instantiate the class before setting these will trigger an exception.""" |
---|
12 | def __init__(self,dict=None,xmlMiniDom=None,id='defaultId'): |
---|
13 | dictMode = dict != None |
---|
14 | mdMode = xmlMiniDom != None |
---|
15 | assert not( dictMode and mdMode), 'Mode must be either dictionary of minidom: both assigned' |
---|
16 | assert dictMode or mdMode, 'Mode must be either dictionary of minidom: neither assigned' |
---|
17 | self.defaults = { } |
---|
18 | self.globalDefault = '__unset__' |
---|
19 | if dictMode: |
---|
20 | self.dictInit( dict ) |
---|
21 | elif mdMode: |
---|
22 | self.mdInit( xmlMiniDom ) |
---|
23 | |
---|
24 | def dictInit( self, dict ): |
---|
25 | __doc__ = """Initialise from a dictionary.""" |
---|
26 | for a in self.attributes: |
---|
27 | if dict.has_key(a): |
---|
28 | self.__dict__[a] = dict[a] |
---|
29 | else: |
---|
30 | self.__dict__[a] = self.defaults.get( a, self.globalDefault ) |
---|
31 | |
---|
32 | def mdInit( self, el ): |
---|
33 | __doc__ = """Initialisation from a mindom XML element. The list of attributes must be set by the class factory before the class is initialised""" |
---|
34 | for a in self.attributes: |
---|
35 | if el.hasAttribute( a ): |
---|
36 | v = el.getAttribute( a ) |
---|
37 | self.__dict__[a] = v |
---|
38 | else: |
---|
39 | self.__dict__[a] = self.defaults.get( a, self.globalDefault ) |
---|
40 | |
---|
41 | |
---|
42 | class config(object): |
---|
43 | """Read in a vocabulary collection configuration document and a vocabulary document""" |
---|
44 | |
---|
45 | def __init__(self, configdoc='out/dreqDefn.xml', thisdoc='../workbook/trial_20150724.xml',silent=True): |
---|
46 | self.silent = silent |
---|
47 | self.vdef = configdoc |
---|
48 | self.vsamp = thisdoc |
---|
49 | self.nts = collections.namedtuple( 'sectdef', ['tag','label','title','id','itemLabelMode','level'] ) |
---|
50 | self.nti = collections.namedtuple( 'itemdef', ['tag','label','title','type','rClass','techNote'] ) |
---|
51 | self.ntt = collections.namedtuple( 'sectinit', ['header','attributes'] ) |
---|
52 | self.ntf = collections.namedtuple( 'sect', ['header','attDefn','items'] ) |
---|
53 | |
---|
54 | self.coll = {} |
---|
55 | doc = xml.dom.minidom.parse( self.vdef ) |
---|
56 | self.contentDoc = xml.dom.minidom.parse( self.vsamp ) |
---|
57 | vl = doc.getElementsByTagName( 'table' ) |
---|
58 | self.slist = [] |
---|
59 | self.tables = {} |
---|
60 | tables = {} |
---|
61 | self.tableClasses = {} |
---|
62 | self.tableItems = collections.defaultdict( list ) |
---|
63 | for v in vl: |
---|
64 | t = self.parsevcfg(v) |
---|
65 | tables[t[0].label] = t |
---|
66 | self.tableClasses[t[0].label] = self.itemClassFact( t.header.itemLabelMode, t.attributes.keys() ) |
---|
67 | self.slist.append( t ) |
---|
68 | |
---|
69 | self.recordAttributeDefn = tables |
---|
70 | for k in tables.keys(): |
---|
71 | vl = self.contentDoc.getElementsByTagName( k ) |
---|
72 | if len(vl) == 1: |
---|
73 | v = vl[0] |
---|
74 | t = v.getAttribute( 'title' ) |
---|
75 | i = v.getAttribute( 'id' ) |
---|
76 | il = v.getElementsByTagName( 'item' ) |
---|
77 | self.info( '%s, %s, %s, %s' % ( k, t, i, len(il) ) ) |
---|
78 | |
---|
79 | self.tables[k] = (i,t,len(il)) |
---|
80 | |
---|
81 | for i in il: |
---|
82 | ii = self.tableClasses[k](xmlMiniDom=i) |
---|
83 | self.tableItems[k].append( ii ) |
---|
84 | elif len(vl) > 1: |
---|
85 | l1 = [] |
---|
86 | l2 = [] |
---|
87 | for v in vl: |
---|
88 | t = v.getAttribute( 'title' ) |
---|
89 | i = v.getAttribute( 'id' ) |
---|
90 | il = v.getElementsByTagName( 'item' ) |
---|
91 | self.info( '%s, %s, %s, %s' % ( k, t, i, len(il) ) ) |
---|
92 | l1.append( (i,t,len(il)) ) |
---|
93 | |
---|
94 | l2i = [] |
---|
95 | for i in il: |
---|
96 | ii = self.tableClasses[k](xmlMiniDom=i) |
---|
97 | l2i.append( ii ) |
---|
98 | l2.append( l2i ) |
---|
99 | self.tables[k] = l1 |
---|
100 | self.tableItems[k] = l2 |
---|
101 | self.coll[k] = self.ntf( self.recordAttributeDefn[k].header, self.recordAttributeDefn[k].attributes, self.tableItems[k] ) |
---|
102 | |
---|
103 | def info(self,ss): |
---|
104 | if not self.silent: |
---|
105 | print ss |
---|
106 | |
---|
107 | def get(self): |
---|
108 | return self.coll |
---|
109 | |
---|
110 | def itemClassFact(self,itemLabelMode,attributes): |
---|
111 | class dreqItem(dreqItemBase): |
---|
112 | """Inherits all methods from dreqItemBase""" |
---|
113 | |
---|
114 | dreqItem.itemLabelMode = itemLabelMode |
---|
115 | dreqItem.attributes = attributes |
---|
116 | return dreqItem |
---|
117 | |
---|
118 | def parsevcfg(self,v): |
---|
119 | """Parse a section definition element, including all the record attributes. The results are returned as a namedtuple of attributes for the section and a dictionary of record attribute specifications.""" |
---|
120 | l = v.getAttribute( 'label' ) |
---|
121 | t = v.getAttribute( 'title' ) |
---|
122 | i = v.getAttribute( 'id' ) |
---|
123 | ilm = v.getAttribute( 'itemLabelMode' ) |
---|
124 | lev = v.getAttribute( 'level' ) |
---|
125 | il = v.getElementsByTagName( 'rowAttribute' ) |
---|
126 | vtt = self.nts( v.nodeName, l,t,i,ilm,lev ) |
---|
127 | idict = {} |
---|
128 | for i in il: |
---|
129 | tt = self.parseicfg(i) |
---|
130 | idict[tt.label] = tt |
---|
131 | return self.ntt( vtt, idict ) |
---|
132 | |
---|
133 | def parseicfg(self,i): |
---|
134 | """Parse a record attribute specification""" |
---|
135 | defs = {'type':"xs:string"} |
---|
136 | ll = [] |
---|
137 | for k in ['label','title','type','class','techNote']: |
---|
138 | if i.hasAttribute( k ): |
---|
139 | ll.append( i.getAttribute( k ) ) |
---|
140 | else: |
---|
141 | ll.append( defs.get( k, None ) ) |
---|
142 | l, t, ty, cls, tn = ll |
---|
143 | self.lastTitle = t |
---|
144 | return self.nti( i.nodeName, l,t,ty,cls,tn ) |
---|
145 | |
---|
146 | class container(object): |
---|
147 | """Simple container class, to hold a set of dictionaries of lists.""" |
---|
148 | def __init__(self, atl ): |
---|
149 | self.uuid = {} |
---|
150 | for a in atl: |
---|
151 | self.__dict__[a] = collections.defaultdict( list ) |
---|
152 | |
---|
153 | class c1(object): |
---|
154 | def __init__(self): |
---|
155 | self.a = collections.defaultdict( list ) |
---|
156 | class index(object): |
---|
157 | """Create an index of the document. Cross-references are generated from attributes with class 'internalLink'. |
---|
158 | This version assumes that each record is identified by an "uuid" attribute and that there is a "var" section. |
---|
159 | Invalid internal links are recorded in tme "missingIds" dictionary. |
---|
160 | For any record, with identifier u, iref_by_uuid[u] gives a list of the section and identifier of records linking to that record. |
---|
161 | """ |
---|
162 | |
---|
163 | def __init__(self, dreq): |
---|
164 | self.silent = True |
---|
165 | self.uuid = {} |
---|
166 | nativeAtts = ['uuid','iref_by_uuid','iref_by_sect','missingIds'] |
---|
167 | naok = map( lambda x: not dreq.has_key(x), nativeAtts ) |
---|
168 | assert all(naok), 'This version cannot index collections containing sections with names: %s' % str( nativeAtts ) |
---|
169 | self.var_uuid = {} |
---|
170 | self.var_by_name = collections.defaultdict( list ) |
---|
171 | self.var_by_sn = collections.defaultdict( list ) |
---|
172 | self.iref_by_uuid = collections.defaultdict( list ) |
---|
173 | irefdict = collections.defaultdict( list ) |
---|
174 | for k in dreq.keys(): |
---|
175 | if dreq[k].attDefn.has_key('sn'): |
---|
176 | self.__dict__[k] = container( ['label','sn'] ) |
---|
177 | else: |
---|
178 | self.__dict__[k] = container( ['label'] ) |
---|
179 | ## |
---|
180 | ## collected names of attributes which carry internal links |
---|
181 | ## |
---|
182 | for ka in dreq[k].attDefn.keys(): |
---|
183 | if dreq[k].attDefn[ka].rClass == 'internalLink': |
---|
184 | irefdict[k].append( ka ) |
---|
185 | |
---|
186 | for k in dreq.keys(): |
---|
187 | for i in dreq[k].items: |
---|
188 | self.uuid[i.uuid] = (k,i) |
---|
189 | |
---|
190 | self.missingIds = collections.defaultdict( list ) |
---|
191 | self.iref_by_sect = collections.defaultdict( c1 ) |
---|
192 | for k in dreq.keys(): |
---|
193 | for k2 in irefdict.get( k, [] ): |
---|
194 | n1 = 0 |
---|
195 | n2 = 0 |
---|
196 | for i in dreq[k].items: |
---|
197 | id2 = i.__dict__.get( k2 ) |
---|
198 | if id2 != '__unset__': |
---|
199 | self.iref_by_uuid[ id2 ].append( (k2,i.uuid) ) |
---|
200 | self.iref_by_sect[ id2 ].a[k2].append( i.uuid ) |
---|
201 | if self.uuid.has_key( id2 ): |
---|
202 | n1 += 1 |
---|
203 | else: |
---|
204 | n2 += 1 |
---|
205 | self.missingIds[id2].append( (k,k2,i.uuid) ) |
---|
206 | self.info( 'INFO:: %s, %s: %s (%s)' % (k,k2,n1,n2) ) |
---|
207 | |
---|
208 | for k in dreq.keys(): |
---|
209 | for i in dreq[k].items: |
---|
210 | self.__dict__[k].uuid[i.uuid] = i |
---|
211 | self.__dict__[k].label[i.label].append( i.uuid ) |
---|
212 | if dreq[k].attDefn.has_key('sn'): |
---|
213 | self.__dict__[k].sn[i.sn].append( i.uuid ) |
---|
214 | |
---|
215 | def info(self,ss): |
---|
216 | if not self.silent: |
---|
217 | print ss |
---|
218 | |
---|
219 | |
---|
220 | class loadDreq(object): |
---|
221 | def __init__(self,dreqXML='../docs/dreq.xml',configdoc='../docs/dreqDefn.xml' ): |
---|
222 | self.c = config( thisdoc=dreqXML, configdoc=configdoc,silent=False) |
---|
223 | self.coll = self.c.get() |
---|
224 | self.inx = index(self.coll) |
---|
225 | |
---|
226 | if __name__ == '__main__': |
---|
227 | dreq = loadDreq() |
---|
228 | |
---|