1 | try: #python 2.5 |
---|
2 | from xml.etree import cElementTree as ET |
---|
3 | except ImportError: |
---|
4 | try: |
---|
5 | # if you've installed it yourself it comes this way |
---|
6 | import cElementTree as ET |
---|
7 | except ImportError: |
---|
8 | # if you've egged it this is the way it comes |
---|
9 | from ndgUtils.elementtree import cElementTree as ET |
---|
10 | import sys, re |
---|
11 | |
---|
12 | |
---|
13 | #this is the NEW xml schema class |
---|
14 | |
---|
15 | class xmlSchema(object): |
---|
16 | ''' this class creates a mapping of the xml schema sequences so that it can be referred to when |
---|
17 | writing out a new moles document - this enables elements to be written in the order specified by the schema |
---|
18 | If a particular sequence mapping can't be found then the elements will be written in whichever order python sees fit. |
---|
19 | It tries various searches of the schema to pick out the sequences (which can be nested) |
---|
20 | ''' |
---|
21 | def __init__(self, schema): |
---|
22 | self.schema=schema |
---|
23 | self.types={} |
---|
24 | self.sequences={} |
---|
25 | self.names=[] |
---|
26 | self.setup() |
---|
27 | |
---|
28 | def __isParent(self,elem): |
---|
29 | #if this element is a parent of other elements (excluding annotation and documentation) |
---|
30 | #then return True else return false |
---|
31 | if elem.getchildren()==[]: |
---|
32 | return False |
---|
33 | for child in elem.getchildren(): |
---|
34 | if child.tag == '{http://www.w3.org/2001/XMLSchema}element': |
---|
35 | return True |
---|
36 | elif child.tag == '{http://www.w3.org/2001/XMLSchema}complexType': |
---|
37 | return True |
---|
38 | elif child.tag == '{http://www.w3.org/2001/XMLSchema}simpleType': |
---|
39 | return True |
---|
40 | elif child.tag == '{http://www.w3.org/2001/XMLSchema}annotation': |
---|
41 | pass |
---|
42 | elif child.tag == '{http://www.w3.org/2001/XMLSchema}sequence': |
---|
43 | return True |
---|
44 | elif child.tag == '{http://www.w3.org/2001/XMLSchema}choice': |
---|
45 | return True |
---|
46 | elif child.tag == '{http://www.w3.org/2001/XMLSchema}complexContent': |
---|
47 | return True |
---|
48 | elif child.tag == '{http://www.w3.org/2001/XMLSchema}documentation': |
---|
49 | pass |
---|
50 | elif child.tag == '{http://www.w3.org/2001/XMLSchema}restriction': |
---|
51 | return True |
---|
52 | elif child.tag == '{http://www.w3.org/2001/XMLSchema}extension': |
---|
53 | pass |
---|
54 | elif child.tag == '{http://www.w3.org/2001/XMLSchema}attribute': |
---|
55 | pass #? |
---|
56 | elif child.tag == '{http://www.w3.org/2001/XMLSchema}enumeration': |
---|
57 | pass #? |
---|
58 | |
---|
59 | #pattern |
---|
60 | #attributeGroup |
---|
61 | #minLength |
---|
62 | #minInclusive |
---|
63 | #maxInclusive |
---|
64 | |
---|
65 | return False |
---|
66 | |
---|
67 | |
---|
68 | def __checkChildren(self, parentclassname, elem): |
---|
69 | #if parentclassname == 'dgMetadataProvenance': |
---|
70 | #pdb.set_trace() |
---|
71 | for child in elem.getchildren(): |
---|
72 | childname = None |
---|
73 | if child.attrib.has_key('name'): |
---|
74 | if child.attrib['name'][-4:]=='Type': |
---|
75 | if child.attrib['name'] in ['dgDataSetType','dgFeatureType']: #add other rogue 'names' ending in Type to this list |
---|
76 | childname=child.attrib['name'] |
---|
77 | else: |
---|
78 | childname=child.attrib['name'][:-4] |
---|
79 | else: |
---|
80 | childname=child.attrib['name'] |
---|
81 | if child.attrib.has_key('type'): |
---|
82 | if child.attrib['type'][:6]=='moles:': |
---|
83 | typename=child.attrib['type'][6:-4] |
---|
84 | self.types[childname]=typename |
---|
85 | elif child.attrib.has_key('ref'): |
---|
86 | if child.attrib['ref'][:6]=='moles:': |
---|
87 | childname=child.attrib['ref'][6:] |
---|
88 | if childname is not None: |
---|
89 | if self.sequences[parentclassname] is None: |
---|
90 | self.sequences[parentclassname]=[childname] |
---|
91 | else: |
---|
92 | tmp= self.sequences[parentclassname] |
---|
93 | tmp.append(childname) |
---|
94 | self.sequences[parentclassname]=tmp |
---|
95 | if self.__isParent(child): |
---|
96 | if not self.sequences.has_key(childname): |
---|
97 | self.sequences[childname]=[] |
---|
98 | self.__checkChildren(childname,child) |
---|
99 | else: |
---|
100 | if self.__isParent(child): |
---|
101 | self.__checkChildren(parentclassname,child) |
---|
102 | |
---|
103 | def __resolveTypes(self): |
---|
104 | for ty in self.types: |
---|
105 | try: |
---|
106 | self.sequences[ty]=self.sequences[self.types[ty]] |
---|
107 | except: |
---|
108 | pass |
---|
109 | |
---|
110 | def setup(self): |
---|
111 | tree = ET.parse(self.schema) |
---|
112 | root = tree.getroot() |
---|
113 | self.sequences['dummy']=[] |
---|
114 | self.__checkChildren('dummy',root) |
---|
115 | self.__resolveTypes() |
---|
116 | self.sequences['dummy']=None |
---|
117 | |
---|
118 | def lookupOrder(self, dict,classname): |
---|
119 | '''takes the attributes in a dictionary and orders them according to the schema sequence''' |
---|
120 | try: |
---|
121 | order=self.sequences[classname] |
---|
122 | except KeyError: |
---|
123 | order = [] |
---|
124 | for key in dict: |
---|
125 | #print key error |
---|
126 | print 'KEY ERROR %s'%classname |
---|
127 | if key is not 'schema': |
---|
128 | if key is not 'ns': |
---|
129 | order.append(key) # if it can't be found an unordered list is returned from the original dictionary items |
---|
130 | #print 'returning %s'%order |
---|
131 | return order |
---|
132 | |
---|
133 | class molesElement(object): |
---|
134 | ''' molesElement class - base class of all elements ''' |
---|
135 | def __init__(self, namespace=None, **kwargs): |
---|
136 | if namespace !=None: |
---|
137 | self.ns=namespace |
---|
138 | else: |
---|
139 | self.ns = '{http://ndg.nerc.ac.uk/moles}' |
---|
140 | self.__dict__.update(kwargs) |
---|
141 | |
---|
142 | def __combineattributes(self,attname, newChild): |
---|
143 | att = getattr(self,attname) |
---|
144 | if isinstance(att,molesElement): |
---|
145 | setattr(self,attname,[att, newChild]) |
---|
146 | else: |
---|
147 | att.append(newChild) |
---|
148 | setattr(self, attname,att) |
---|
149 | |
---|
150 | def _stripNS(self, tagtostrip): |
---|
151 | try: |
---|
152 | elemname=tagtostrip.split('}')[1] |
---|
153 | ns=tagtostrip.split('}')[0]+'}' |
---|
154 | except IndexError: |
---|
155 | elemname=tagtostrip |
---|
156 | ns='{https://ndg.nerc.ac.uk/moles}' |
---|
157 | return elemname, ns |
---|
158 | |
---|
159 | ns=tagtostrip.split('}')[1] |
---|
160 | |
---|
161 | |
---|
162 | def addChildElem(self, childname, childobj): |
---|
163 | #sometimes you want to add a child element but don't know if there is one already. In which case you want to create a list of child objects. |
---|
164 | if hasattr(self, childname): |
---|
165 | currentattribute=getattr(self,childname) |
---|
166 | if type(getattr(self,childname)) is list: |
---|
167 | currentattribute.append(childobj) |
---|
168 | else: |
---|
169 | newlist=[currentattribute] |
---|
170 | newlist.append(childobj) |
---|
171 | setattr(self,childname, newlist) |
---|
172 | else: |
---|
173 | setattr(self,childname, childobj) |
---|
174 | |
---|
175 | def toXML(self, molesFrag, schema=None): |
---|
176 | if schema != None: |
---|
177 | self.schema=schema |
---|
178 | else: |
---|
179 | self.schema=None |
---|
180 | orderedAttribs=schema.lookupOrder(self.__dict__,molesFrag.tag) |
---|
181 | for item in orderedAttribs: |
---|
182 | if type(item) is xmlSchema: |
---|
183 | continue |
---|
184 | if item == '{http://ndg.nerc.ac.uk/moles}': |
---|
185 | continue |
---|
186 | |
---|
187 | # avoid abstractText being output twice |
---|
188 | # NB, this is a hack to fix the problem of this class not handling |
---|
189 | # namespaces correctly - ideally this is what should be fixed here |
---|
190 | if str(molesFrag.tag) == "abstract": |
---|
191 | if len(molesFrag) > 0: |
---|
192 | continue |
---|
193 | |
---|
194 | if hasattr(self, item): |
---|
195 | if isinstance(self.__dict__[item], molesElement): |
---|
196 | frag=ET.Element(item) |
---|
197 | self.__dict__[item].toXML(frag,schema=self.schema) |
---|
198 | molesFrag.append(frag) |
---|
199 | elif isinstance(self.__dict__[item], list): |
---|
200 | for it in self.__dict__[item]: |
---|
201 | if isinstance(it, molesElement): |
---|
202 | frag=ET.Element(item) |
---|
203 | it.toXML(frag, schema=self.schema) |
---|
204 | else: |
---|
205 | frag=ET.Element(item) |
---|
206 | frag.text=it |
---|
207 | |
---|
208 | molesFrag.append(frag) |
---|
209 | else: |
---|
210 | frag=ET.Element(item) |
---|
211 | frag.text=self.__dict__[item] |
---|
212 | molesFrag.append(frag) |
---|
213 | |
---|
214 | return molesFrag |
---|
215 | |
---|
216 | def fromXML(self,molesFrag): |
---|
217 | children = molesFrag.getchildren() |
---|
218 | |
---|
219 | if children ==[]: |
---|
220 | elementWithoutNS, ns=self._stripNS(molesElement.tag) |
---|
221 | setattr(self,elementWithoutNS, molesElement.text) |
---|
222 | if children!=[]: |
---|
223 | for child in children: |
---|
224 | if child.getchildren()!=[]: |
---|
225 | childWithoutNS, ns=self._stripNS(child.tag) |
---|
226 | newClass=type(childWithoutNS, (molesElement,),{}) |
---|
227 | newChild=newClass(ns) |
---|
228 | newChild.fromXML(child) |
---|
229 | kw=child.tag |
---|
230 | if hasattr(self, childWithoutNS): |
---|
231 | self.__combineattributes(childWithoutNS, newChild) |
---|
232 | else: |
---|
233 | setattr(self,childWithoutNS, newChild) |
---|
234 | else: |
---|
235 | childWithoutNS, ns=self._stripNS(child.tag) |
---|
236 | |
---|
237 | # escape any special characters - mainly required for |
---|
238 | # >, etc which get expanded out whilst processing the |
---|
239 | # elements here |
---|
240 | value = self.escapeSpecialCharacters(child.text) |
---|
241 | setattr(self,childWithoutNS, value) |
---|
242 | |
---|
243 | def escapeSpecialCharacters(self, text): |
---|
244 | ''' |
---|
245 | Escape any special characters included in the input text string |
---|
246 | @param text: string to escape special characters from |
---|
247 | @return: string with special characters escaped |
---|
248 | ''' |
---|
249 | subtool=subAI() |
---|
250 | s=subtool.sub(text) |
---|
251 | #if text != s: |
---|
252 | # print "NB, adjusted string content: %s -> %s" %(text, s) |
---|
253 | return s |
---|
254 | |
---|
255 | |
---|
256 | class subAI: |
---|
257 | ''' This is Alan Iwi's substitute and replace orphan <> code ''' |
---|
258 | def __init__(self): |
---|
259 | self.r1=re.compile('<([^>]*(<|$))') |
---|
260 | self.r2=re.compile('((^|>)[^<]*)>') |
---|
261 | def sub(self,s): |
---|
262 | if s is None: |
---|
263 | return s |
---|
264 | |
---|
265 | old='' |
---|
266 | while s != old: |
---|
267 | old=s |
---|
268 | s=self.r1.sub(r'<\1',s) |
---|
269 | s=self.r2.sub(r'\1>',s) |
---|
270 | return s |
---|
271 | |
---|
272 | |
---|
273 | class dgMetadata(molesElement): |
---|
274 | def __init__(self, **kwargs): |
---|
275 | molesElement.__init__(self, **kwargs) |
---|
276 | |
---|
277 | def toXML(self): |
---|
278 | self.schema = xmlSchema('ndgmetadata1.3.xsd') |
---|
279 | molesFrag=ET.Element('dgMetadata') |
---|
280 | molesFrag.attrib['xmlns']='http://ndg.nerc.ac.uk/moles' |
---|
281 | molesElement.toXML(self,molesFrag,schema=self.schema) |
---|
282 | return molesFrag |
---|
283 | |
---|
284 | |
---|
285 | class MolesDoc(object): |
---|
286 | def __init__(self): |
---|
287 | self._createClasses() |
---|
288 | |
---|
289 | def _create_a_class(self,name, base_class): |
---|
290 | aNewClass=type(name, (base_class,),{}) |
---|
291 | return aNewClass |
---|
292 | |
---|
293 | def _createClasses(self): |
---|
294 | #if you want more classes just add their names to this list |
---|
295 | #could probably examine the schema here.... |
---|
296 | classList= \ |
---|
297 | ['dataModelID', \ |
---|
298 | 'dgDataGranule', \ |
---|
299 | 'dgGranuleSummary',\ |
---|
300 | 'dgGranuleName',\ |
---|
301 | 'simpleCondition',\ |
---|
302 | 'dgSecurityCondition',\ |
---|
303 | 'accessControlPolicy',\ |
---|
304 | 'dgDataEntity', \ |
---|
305 | 'dgMetadataRecord', \ |
---|
306 | 'dgMetadataID', |
---|
307 | 'dgCoverage', \ |
---|
308 | 'dgDataCoverage',\ |
---|
309 | 'dgSpatioTemporalRange',\ |
---|
310 | 'dgSpatioTemporalCoverage', \ |
---|
311 | 'dgSpatialCoverage', \ |
---|
312 | 'dgTemporalCoverage', \ |
---|
313 | 'dgBoundingBox', \ |
---|
314 | 'dgArea',\ |
---|
315 | 'DateRange', \ |
---|
316 | 'dgDataSummary', \ |
---|
317 | 'dgParameterSummary', \ |
---|
318 | 'dgParameterValue', \ |
---|
319 | 'dgValueDataParameter', \ |
---|
320 | 'dgStandardUnit', \ |
---|
321 | 'dgOriginalUnit', \ |
---|
322 | 'dgRangeDataParameter', \ |
---|
323 | 'dgEnumerationParameter', \ |
---|
324 | 'dgParameterGroup', \ |
---|
325 | 'dgComponentParameter', \ |
---|
326 | 'dgStdParameterMeasured', \ |
---|
327 | 'dgStandardUnit', \ |
---|
328 | 'dgValidTermID', \ |
---|
329 | 'dgValidTermParentID', \ |
---|
330 | 'dgValidSubterm', \ |
---|
331 | 'ListLevel',\ |
---|
332 | 'metadataDescriptionID', \ |
---|
333 | 'dgMetadataDescription', \ |
---|
334 | 'dgStructuredKeyword', \ |
---|
335 | 'abstract',\ |
---|
336 | 'descriptionSection',\ |
---|
337 | 'dgReferenceClass',\ |
---|
338 | 'descriptionOnlineReference',\ |
---|
339 | 'dgSimpleLink',\ |
---|
340 | 'logos',\ |
---|
341 | 'logoURI',\ |
---|
342 | 'dgDataSetType',\ |
---|
343 | 'dgSimulation',\ |
---|
344 | 'dgAnalysis',\ |
---|
345 | 'dgMeasurement',\ |
---|
346 | 'dgFeatureType',\ |
---|
347 | 'dgDataRoles',\ |
---|
348 | 'dgDataCreator',\ |
---|
349 | 'dgDataCurator',\ |
---|
350 | 'dgRoleHolder',\ |
---|
351 | 'dgOrganisationID',\ |
---|
352 | 'dgPersonID',\ |
---|
353 | 'dgRoleID',\ |
---|
354 | 'contactDetails',\ |
---|
355 | 'address',\ |
---|
356 | 'dgActivity',\ |
---|
357 | 'relatedActivity',\ |
---|
358 | 'dgActivityDataCollection',\ |
---|
359 | 'dgActivityDataProject',\ |
---|
360 | 'dgActivityDataCampaign',\ |
---|
361 | 'dgActivityDataInvestigation',\ |
---|
362 | 'dgActivityRole',\ |
---|
363 | 'dgActivityDeployment',\ |
---|
364 | 'dgActivityCoverage',\ |
---|
365 | 'dgActivityDuration',\ |
---|
366 | 'ActivityDeployment',\ |
---|
367 | 'DateStart',\ |
---|
368 | 'DateEnd',\ |
---|
369 | 'ActivityID',\ |
---|
370 | 'DataProductionToolID',\ |
---|
371 | 'ObservationStationID',\ |
---|
372 | 'ObsStationDeployment',\ |
---|
373 | 'dgPrincipalInvestigator',\ |
---|
374 | 'dgInvestigator',\ |
---|
375 | 'dgFlight',\ |
---|
376 | 'dgCruise',\ |
---|
377 | 'RelatedDeployment',\ |
---|
378 | 'dgModel',\ |
---|
379 | 'DPTDeployment',\ |
---|
380 | 'dgInstrument',\ |
---|
381 | 'dgDataProductionTool',\ |
---|
382 | 'dgDPTRole',\ |
---|
383 | 'dgObservationStation',\ |
---|
384 | 'dgStationaryPlatform',\ |
---|
385 | 'dgMovingPlatform',\ |
---|
386 | 'dgLandStation',\ |
---|
387 | 'dgMooring',\ |
---|
388 | 'position',\ |
---|
389 | 'dgStationGroup',\ |
---|
390 | 'dgShip',\ |
---|
391 | 'vesselType',\ |
---|
392 | 'RecordCreation',\ |
---|
393 | 'RecordUpdate',\ |
---|
394 | 'dgMetadataProvenance',\ |
---|
395 | ] |
---|
396 | |
---|
397 | for className in classList: |
---|
398 | newClass=self._create_a_class(className, molesElement) |
---|
399 | setattr(self,className,newClass) |
---|
400 | |
---|
401 | |
---|
402 | def main(): |
---|
403 | schema = xmlSchema('ndgmetadata1.3.xsd') |
---|
404 | |
---|
405 | #print schema.sequences |
---|
406 | for key in schema.sequences: |
---|
407 | print '------------------------------------------------------------------' |
---|
408 | print key |
---|
409 | print schema.sequences[key] |
---|
410 | |
---|
411 | |
---|
412 | if __name__=='__main__': |
---|
413 | main() |
---|