source: CMIP6dreqbuild/trunk/src/framework/dreq.py @ 370

Subversion URL: http://proj.badc.rl.ac.uk/svn/exarch/CMIP6dreqbuild/trunk/src/framework/dreq.py@370
Revision 370, 8.5 KB checked in by mjuckes, 5 years ago (diff)

working namespace implementation for data request

Line 
1"""This module provides a basic python API to the Data Request.
2After ingesting the XML documents (configuration and request) the module generates two python objects:
31. A collection of records
42. Index
5"""
6import xml, string, collections
7import xml.dom
8import xml.dom.minidom
9
10class dreqItemBase(object):
11       __doc__ = """A base class used in the definition of records. Designed to be used via a class factory which sets "itemLabelMode" and "attributes" before the class is instantiated: attempting to instantiate the class before setting these will trigger an exception."""
12       def __init__(self,dict=None,xmlMiniDom=None,id='defaultId'):
13         dictMode = dict != None
14         mdMode = xmlMiniDom != None
15         assert not( dictMode and mdMode), 'Mode must be either dictionary of minidom: both assigned'
16         assert dictMode or mdMode, 'Mode must be either dictionary of minidom: neither assigned'
17         self.defaults = { }
18         self.globalDefault = '__unset__'
19         if dictMode:
20           self.dictInit( dict )
21         elif mdMode:
22           self.mdInit( xmlMiniDom )
23
24       def dictInit( self, dict ):
25         __doc__ = """Initialise from a dictionary."""
26         for a in self.attributes:
27           if dict.has_key(a):
28             self.__dict__[a] = dict[a]
29           else:
30             self.__dict__[a] = self.defaults.get( a, self.globalDefault )
31
32       def mdInit( self, el ):
33         __doc__ = """Initialisation from a mindom XML element. The list of attributes must be set by the class factory before the class is initialised"""
34         for a in self.attributes:
35           if el.hasAttribute( a ):
36             v = el.getAttribute( a )
37             self.__dict__[a] = v
38           else:
39             self.__dict__[a] = self.defaults.get( a, self.globalDefault )
40
41   
42class config(object):
43  """Read in a vocabulary collection configuration document and a vocabulary document"""
44
45  def __init__(self, configdoc='out/dreqDefn.xml', thisdoc='../workbook/trial_20150724.xml'):
46    self.silent = True
47    self.vdef = configdoc
48    self.vsamp = thisdoc
49    self.nts = collections.namedtuple( 'sectdef', ['tag','label','title','id','itemLabelMode','level'] )
50    self.nti = collections.namedtuple( 'itemdef', ['tag','label','title','type','rClass','techNote'] )
51    self.ntt = collections.namedtuple( 'sectinit', ['header','attributes'] )
52    self.ntf = collections.namedtuple( 'sect', ['header','attDefn','items'] )
53
54    self.coll = {}
55    doc = xml.dom.minidom.parse( self.vdef  )
56    self.contentDoc = xml.dom.minidom.parse( self.vsamp )
57    vl = doc.getElementsByTagName( 'table' )
58    self.slist = []
59    self.tables = {}
60    tables = {}
61    self.tableClasses = {}
62    self.tableItems = collections.defaultdict( list )
63    for v in vl:
64      t = self.parsevcfg(v)
65      tables[t[0].label] = t
66      self.tableClasses[t[0].label] = self.itemClassFact( t.header.itemLabelMode, t.attributes.keys() )
67      self.slist.append( t )
68
69    self.recordAttributeDefn = tables
70    for k in tables.keys():
71      vl = self.contentDoc.getElementsByTagName( k )
72      if len(vl) == 1:
73        v = vl[0]
74        t = v.getAttribute( 'title' )
75        i = v.getAttribute( 'id' )
76        il = v.getElementsByTagName( 'item' )
77        self.info( '%s, %s, %s, %s' % ( k, t, i, len(il) ) )
78 
79        self.tables[k] = (i,t,len(il))
80       
81        for i in il:
82          ii = self.tableClasses[k](xmlMiniDom=i)
83          self.tableItems[k].append( ii )
84      elif len(vl) > 1:
85        l1 = []
86        l2 = []
87        for v in vl:
88          t = v.getAttribute( 'title' )
89          i = v.getAttribute( 'id' )
90          il = v.getElementsByTagName( 'item' )
91          self.info( '%s, %s, %s, %s' % ( k, t, i, len(il) ) )
92          l1.append( (i,t,len(il)) )
93         
94          l2i = []
95          for i in il:
96            ii = self.tableClasses[k](xmlMiniDom=i)
97            l2i.append( ii )
98          l2.append( l2i )
99        self.tables[k] = l1
100        self.tableItems[k] = l2
101      self.coll[k] = self.ntf( self.recordAttributeDefn[k].header, self.recordAttributeDefn[k].attributes, self.tableItems[k] )
102 
103  def info(self,ss):
104    if not self.silent:
105      print ss
106
107  def get(self):
108    return self.coll
109
110  def itemClassFact(self,itemLabelMode,attributes):
111     class dreqItem(dreqItemBase):
112       """Inherits all methods from dreqItemBase"""
113       
114     dreqItem.itemLabelMode = itemLabelMode
115     dreqItem.attributes = attributes
116     return dreqItem
117         
118  def parsevcfg(self,v):
119      """Parse a section definition element, including all the record attributes. The results are returned as a namedtuple of attributes for the section and a dictionary of record attribute specifications."""
120      l = v.getAttribute( 'label' )
121      t = v.getAttribute( 'title' )
122      i = v.getAttribute( 'id' )
123      ilm = v.getAttribute( 'itemLabelMode' )
124      lev = v.getAttribute( 'level' )
125      il = v.getElementsByTagName( 'rowAttribute' )
126      vtt = self.nts( v.nodeName, l,t,i,ilm,lev )
127      idict = {}
128      for i in il:
129        tt = self.parseicfg(i)
130        idict[tt.label] = tt
131      return self.ntt( vtt, idict )
132
133  def parseicfg(self,i):
134      """Parse a record attribute specification"""
135      defs = {'type':"xs:string"}
136      ll = []
137      for k in ['label','title','type','class','techNote']:
138        if i.hasAttribute( k ):
139          ll.append( i.getAttribute( k ) )
140        else:
141          ll.append( defs.get( k, None ) )
142      l, t, ty, cls, tn = ll
143      self.lastTitle = t
144      return self.nti( i.nodeName, l,t,ty,cls,tn )
145
146class container(object):
147  """Simple container class, to hold a set of dictionaries of lists."""
148  def __init__(self, atl ):
149    self.uuid = {}
150    for a in atl:
151      self.__dict__[a] =  collections.defaultdict( list )
152
153class c1(object):
154  def __init__(self):
155    self.a = collections.defaultdict( list )
156class index(object):
157  """Create an index of the document. Cross-references are generated from attributes with class 'internalLink'.
158This version assumes that each record is identified by an "uuid" attribute and that there is a "var" section.
159Invalid internal links are recorded in tme "missingIds" dictionary.
160For any record, with identifier u, iref_by_uuid[u] gives a list of the section and identifier of records linking to that record.
161"""
162
163  def __init__(self, dreq):
164    self.silent = True
165    self.uuid = {}
166    nativeAtts = ['uuid','iref_by_uuid','iref_by_sect','missingIds']
167    naok = map( lambda x: not dreq.has_key(x), nativeAtts )
168    assert all(naok), 'This version cannot index collections containing sections with names: %s' % str( nativeAtts )
169    self.var_uuid = {}
170    self.var_by_name = collections.defaultdict( list )
171    self.var_by_sn = collections.defaultdict( list )
172    self.iref_by_uuid = collections.defaultdict( list )
173    irefdict = collections.defaultdict( list )
174    for k in dreq.keys():
175      if dreq[k].attDefn.has_key('sn'):
176         self.__dict__[k] =  container( ['label','sn'] )
177      else:
178         self.__dict__[k] =  container( ['label'] )
179    ##
180    ## collected names of attributes which carry internal links
181    ##
182      for ka in dreq[k].attDefn.keys():
183        if dreq[k].attDefn[ka].rClass == 'internalLink':
184           irefdict[k].append( ka )
185
186    for k in dreq.keys():
187        for i in dreq[k].items:
188          self.uuid[i.uuid] = (k,i)
189
190    self.missingIds = collections.defaultdict( list )
191    self.iref_by_sect = collections.defaultdict( c1 )
192    for k in dreq.keys():
193        for k2 in irefdict.get( k, [] ):
194          n1 = 0
195          n2 = 0
196          for i in dreq[k].items:
197            id2 = i.__dict__.get( k2 )
198            if id2 != '__unset__':
199              self.iref_by_uuid[ id2 ].append( (k2,i.uuid) )
200              self.iref_by_sect[ id2 ].a[k2].append( i.uuid )
201              if self.uuid.has_key( id2 ):
202                n1 += 1
203              else:
204                n2 += 1
205                self.missingIds[id2].append( (k,k2,i.uuid) )
206          self.info(  'INFO:: %s, %s%s (%s)' % (k,k2,n1,n2) )
207
208    for k in dreq.keys():
209      for i in dreq[k].items:
210        self.__dict__[k].uuid[i.uuid] = i
211        self.__dict__[k].label[i.label].append( i.uuid )
212        if dreq[k].attDefn.has_key('sn'):
213          self.__dict__[k].sn[i.sn].append( i.uuid )
214
215  def info(self,ss):
216    if not self.silent:
217      print ss
218
219
220src1 = '../workbook/trial_20150724.xml'
221class loadDreq(object):
222  def __init__(self,dreqXML='annotated_20150731.xml',configdoc='out/dreqDefn.xml' ):
223    self.c = config( thisdoc=dreqXML, configdoc=configdoc)
224    self.coll = self.c.get()
225    self.inx = index(self.coll)
226
227if __name__ == '__main__':
228  dreq = loadDreq( )
229
Note: See TracBrowser for help on using the repository browser.