source: CMIP6dreqbuild/trunk/src/framework/dreq.py @ 382

Subversion URL: http://proj.badc.rl.ac.uk/svn/exarch/CMIP6dreqbuild/trunk/src/framework/dreq.py@382
Revision 382, 8.7 KB checked in by mjuckes, 5 years ago (diff)

misc

Line 
1"""This module provides a basic python API to the Data Request.
2After ingesting the XML documents (configuration and request) the module generates two python objects:
31. A collection of records
42. Index
5"""
6import xml, string, collections
7import xml.dom
8import xml.dom.minidom
9
10class dreqItemBase(object):
11       __doc__ = """A base class used in the definition of records. Designed to be used via a class factory which sets "itemLabelMode" and "attributes" before the class is instantiated: attempting to instantiate the class before setting these will trigger an exception."""
12       def __init__(self,dict=None,xmlMiniDom=None,id='defaultId'):
13         dictMode = dict != None
14         mdMode = xmlMiniDom != None
15         assert not( dictMode and mdMode), 'Mode must be either dictionary of minidom: both assigned'
16         assert dictMode or mdMode, 'Mode must be either dictionary of minidom: neither assigned'
17         self.defaults = { }
18         self.globalDefault = '__unset__'
19         if dictMode:
20           self.dictInit( dict )
21         elif mdMode:
22           self.mdInit( xmlMiniDom )
23
24       def dictInit( self, dict ):
25         __doc__ = """Initialise from a dictionary."""
26         for a in self.attributes:
27           if dict.has_key(a):
28             self.__dict__[a] = dict[a]
29           else:
30             self.__dict__[a] = self.defaults.get( a, self.globalDefault )
31
32       def mdInit( self, el ):
33         __doc__ = """Initialisation from a mindom XML element. The list of attributes must be set by the class factory before the class is initialised"""
34         for a in self.attributes:
35           if el.hasAttribute( a ):
36             v = el.getAttribute( a )
37             self.__dict__[a] = v
38           else:
39             self.__dict__[a] = self.defaults.get( a, self.globalDefault )
40
41   
42class config(object):
43  """Read in a vocabulary collection configuration document and a vocabulary document"""
44
45  def __init__(self, configdoc='out/dreqDefn.xml', thisdoc='../workbook/trial_20150724.xml'):
46    self.silent = True
47    self.vdef = configdoc
48    self.vsamp = thisdoc
49    self.nts = collections.namedtuple( 'sectdef', ['tag','label','title','id','itemLabelMode','level'] )
50    self.nti = collections.namedtuple( 'itemdef', ['tag','label','title','type','rClass','techNote'] )
51    self.ntt = collections.namedtuple( 'sectinit', ['header','attributes'] )
52    self.ntf = collections.namedtuple( 'sect', ['header','attDefn','items'] )
53
54    self.coll = {}
55    doc = xml.dom.minidom.parse( self.vdef  )
56    self.contentDoc = xml.dom.minidom.parse( self.vsamp )
57    vl = doc.getElementsByTagName( 'table' )
58    self.slist = []
59    self.tables = {}
60    tables = {}
61    self.tableClasses = {}
62    self.tableItems = collections.defaultdict( list )
63    for v in vl:
64      t = self.parsevcfg(v)
65      tables[t[0].label] = t
66      self.tableClasses[t[0].label] = self.itemClassFact( t.header.itemLabelMode, t.attributes.keys() )
67      self.slist.append( t )
68
69    self.recordAttributeDefn = tables
70    for k in tables.keys():
71      vl = self.contentDoc.getElementsByTagName( k )
72      if len(vl) == 1:
73        v = vl[0]
74        t = v.getAttribute( 'title' )
75        i = v.getAttribute( 'id' )
76        il = v.getElementsByTagName( 'item' )
77        self.info( '%s, %s, %s, %s' % ( k, t, i, len(il) ) )
78 
79        self.tables[k] = (i,t,len(il))
80       
81        for i in il:
82          ii = self.tableClasses[k](xmlMiniDom=i)
83          self.tableItems[k].append( ii )
84      elif len(vl) > 1:
85        l1 = []
86        l2 = []
87        for v in vl:
88          t = v.getAttribute( 'title' )
89          i = v.getAttribute( 'id' )
90          il = v.getElementsByTagName( 'item' )
91          self.info( '%s, %s, %s, %s' % ( k, t, i, len(il) ) )
92          l1.append( (i,t,len(il)) )
93         
94          l2i = []
95          for i in il:
96            ii = self.tableClasses[k](xmlMiniDom=i)
97            l2i.append( ii )
98          l2.append( l2i )
99        self.tables[k] = l1
100        self.tableItems[k] = l2
101      self.coll[k] = self.ntf( self.recordAttributeDefn[k].header, self.recordAttributeDefn[k].attributes, self.tableItems[k] )
102 
103  def info(self,ss):
104    if not self.silent:
105      print ss
106
107  def get(self):
108    return self.coll
109
110  def itemClassFact(self,itemLabelMode,attributes):
111     class dreqItem(dreqItemBase):
112       """Inherits all methods from dreqItemBase"""
113       
114     dreqItem.itemLabelMode = itemLabelMode
115     dreqItem.attributes = attributes
116     return dreqItem
117         
118  def parsevcfg(self,v):
119      """Parse a section definition element, including all the record attributes. The results are returned as a namedtuple of attributes for the section and a dictionary of record attribute specifications."""
120      l = v.getAttribute( 'label' )
121      t = v.getAttribute( 'title' )
122      i = v.getAttribute( 'id' )
123      ilm = v.getAttribute( 'itemLabelMode' )
124      lev = v.getAttribute( 'level' )
125      il = v.getElementsByTagName( 'rowAttribute' )
126      vtt = self.nts( v.nodeName, l,t,i,ilm,lev )
127      idict = {}
128      for i in il:
129        tt = self.parseicfg(i)
130        idict[tt.label] = tt
131      return self.ntt( vtt, idict )
132
133  def parseicfg(self,i):
134      """Parse a record attribute specification"""
135      defs = {'type':"xs:string"}
136      ll = []
137      for k in ['label','title','type','class','techNote']:
138        if i.hasAttribute( k ):
139          ll.append( i.getAttribute( k ) )
140        else:
141          ll.append( defs.get( k, None ) )
142      l, t, ty, cls, tn = ll
143      self.lastTitle = t
144      return self.nti( i.nodeName, l,t,ty,cls,tn )
145
146class container(object):
147  """Simple container class, to hold a set of dictionaries of lists."""
148  def __init__(self, atl ):
149    self.uuid = {}
150    for a in atl:
151      self.__dict__[a] =  collections.defaultdict( list )
152
153class c1(object):
154  def __init__(self):
155    self.a = collections.defaultdict( list )
156class index(object):
157  """Create an index of the document. Cross-references are generated from attributes with class 'internalLink'.
158This version assumes that each record is identified by an "uuid" attribute and that there is a "var" section.
159Invalid internal links are recorded in tme "missingIds" dictionary.
160For any record, with identifier u, iref_by_uuid[u] gives a list of the section and identifier of records linking to that record.
161"""
162
163  def __init__(self, dreq):
164    self.silent = True
165    self.uuid = {}
166    self.uuid2 = collections.defaultdict( list )
167    nativeAtts = ['uuid','iref_by_uuid','iref_by_sect','missingIds']
168    naok = map( lambda x: not dreq.has_key(x), nativeAtts )
169    assert all(naok), 'This version cannot index collections containing sections with names: %s' % str( nativeAtts )
170    self.var_uuid = {}
171    self.var_by_name = collections.defaultdict( list )
172    self.var_by_sn = collections.defaultdict( list )
173    self.iref_by_uuid = collections.defaultdict( list )
174    irefdict = collections.defaultdict( list )
175    for k in dreq.keys():
176      if dreq[k].attDefn.has_key('sn'):
177         self.__dict__[k] =  container( ['label','sn'] )
178      else:
179         self.__dict__[k] =  container( ['label'] )
180    ##
181    ## collected names of attributes which carry internal links
182    ##
183      for ka in dreq[k].attDefn.keys():
184        if dreq[k].attDefn[ka].rClass == 'internalLink':
185           irefdict[k].append( ka )
186
187    for k in dreq.keys():
188        for i in dreq[k].items:
189          if self.uuid.has_key(i.uuid):
190            print 'ERROR.100.0001: Duplicate uuid: %s' % i.uuid
191            self.uuid2[i.uuid].append( (k,i) )
192          else:
193            self.uuid[i.uuid] = (k,i)
194
195    self.missingIds = collections.defaultdict( list )
196    self.iref_by_sect = collections.defaultdict( c1 )
197    for k in dreq.keys():
198        for k2 in irefdict.get( k, [] ):
199          n1 = 0
200          n2 = 0
201          for i in dreq[k].items:
202            id2 = i.__dict__.get( k2 )
203            if id2 != '__unset__':
204              self.iref_by_uuid[ id2 ].append( (k2,i.uuid) )
205              self.iref_by_sect[ id2 ].a[k2].append( i.uuid )
206              if self.uuid.has_key( id2 ):
207                n1 += 1
208              else:
209                n2 += 1
210                self.missingIds[id2].append( (k,k2,i.uuid) )
211          self.info(  'INFO:: %s, %s%s (%s)' % (k,k2,n1,n2) )
212
213    for k in dreq.keys():
214      for i in dreq[k].items:
215        self.__dict__[k].uuid[i.uuid] = i
216        self.__dict__[k].label[i.label].append( i.uuid )
217        if dreq[k].attDefn.has_key('sn'):
218          self.__dict__[k].sn[i.sn].append( i.uuid )
219
220  def info(self,ss):
221    if not self.silent:
222      print ss
223
224
225src1 = '../workbook/trial_20150831.xml'
226class loadDreq(object):
227  def __init__(self,dreqXML='annotated_20150731.xml',configdoc='out/dreqDefn.xml' ):
228    self.c = config( thisdoc=dreqXML, configdoc=configdoc)
229    self.coll = self.c.get()
230    self.inx = index(self.coll)
231
232if __name__ == '__main__':
233  dreq = loadDreq( )
234
Note: See TracBrowser for help on using the repository browser.