source: TI12-security/trunk/python/ndg.security.test/ndg/security/test/elementTreeC14n/testC14n.py @ 4839

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI12-security/trunk/python/ndg.security.test/ndg/security/test/elementTreeC14n/testC14n.py@4839
Revision 4839, 17.6 KB checked in by pjkersha, 11 years ago (diff)

Changed licence from Q Public to BSD

  • Property svn:executable set to *
Line 
1#!/usr/bin/env python
2"""NDG ElementTreeC14n class unit tests
3
4NERC Data Grid Project
5"""
6__author__ = "P J Kershaw"
7__date__ = "03/01/07"
8__copyright__ = "(C) 2009 Science and Technology Facilities Council"
9__license__ = "BSD - see LICENSE file in top-level directory"__contact__ = "Philip.Kershaw@stfc.ac.uk"
10__revision__ = '$Id: ElementTreeC14nTest.py 3202 2008-01-11 13:42:34Z pjkersha $'
11
12import unittest
13import os
14import sys
15import getpass
16import traceback
17
18from difflib import unified_diff
19
20from StringIO import StringIO
21
22from elementtree import ElementC14N as ETC14N
23from elementtree import ElementTree as ET
24
25# Minidom based Canonicalization from ZSI for comparison
26from ZSI.wstools.c14n import Canonicalize
27
28from xml.xpath.Context import Context
29from xml import xpath
30from xml.dom.ext.reader import PyExpat
31
32xpdVars = os.path.expandvars
33jnPath = os.path.join
34
35class ElementTreeC14nTestCase(unittest.TestCase):
36   
37    def setUp(self):
38       
39        if 'NDGSEC_INT_DEBUG' in os.environ:
40            import pdb
41            pdb.set_trace()         
42
43    def assertEqual(self, a, b):
44        '''Override default to get something easy to read than super class
45        behaviour'''
46        if a != b:
47            diffGen = unified_diff(a.split('\n'), b.split('\n'))
48            raise AssertionError('\n'+'\n'.join(diffGen))
49       
50    def test01UTF8DocEncoding(self):
51       
52        # http://www.w3.org/TR/xml-c14n#Example-UTF8
53        xml = '<?xml version="1.0" encoding="ISO-8859-1"?><doc>&#169;</doc>'
54        elem = ET.fromstring(xml)
55        f = StringIO()
56        ETC14N.write(elem, f)
57        c14n = f.getvalue()
58        #self.assertEqual(c14n, '<doc>#xC2#xA9</doc>')
59        self.assertEqual(c14n, '<doc>\xC2\xA9</doc>')
60
61    def test01aPIsCommentsAndOutsideOfDocElem(self):
62        # http://www.w3.org/TR/xml-c14n#Example-OutsideDoc - PIs, Comments, and
63        # Outside of Document Element
64        xml = \
65'''<?xml version="1.0"?>
66
67<?xml-stylesheet   href="doc.xsl"
68   type="text/xsl"   ?>
69
70<!DOCTYPE doc SYSTEM "doc.dtd">
71
72<doc>Hello, world!<!-- Comment 1 --></doc>
73
74<?pi-without-data     ?>
75
76<!-- Comment 2 -->
77
78<!-- Comment 3 -->'''
79
80        exptdC14n = \
81'''<?xml-stylesheet href="doc.xsl"
82   type="text/xsl"   ?>
83<doc>Hello, world!</doc>
84<?pi-without-data?>'''
85
86        elem = ET.fromstring(xml)
87        f = StringIO()
88        ETC14N.write(elem, f)
89        c14n = f.getvalue()
90        self.assertEqual(c14n, exptdC14n)
91
92   
93    def test02NormalizeLineBreaks(self):
94        xml = '<?xml version="1.0" encoding="UTF-8"?>\r\n<a/>\r\n'
95        elem = ET.fromstring(xml)
96        f = StringIO()
97        ET.ElementTree(elem).write_c14n(f)
98        c14n = f.getvalue()
99        self.failIf('\r' in c14n, "Carriage return \r char found in c14n")
100
101   
102    def test03NormalizedAttrVals(self):
103        pass
104
105   
106    def test04CharAndParsedEntityRefsReplaced(self):
107        xml = '''<!DOCTYPE doc [
108<!ATTLIST doc attrExtEnt ENTITY #IMPLIED>
109<!ENTITY ent1 "Hello">
110<!ENTITY ent2 SYSTEM "world.txt">
111<!ENTITY entExt SYSTEM "earth.gif" NDATA gif>
112<!NOTATION gif SYSTEM "viewgif.exe">
113]>
114<doc attrExtEnt="entExt">
115   &ent1;, &ent2;!
116</doc>
117
118<!-- Let world.txt contain "world" (excluding the quotes) -->'''
119
120        exptdC14n = '''<doc attrExtEnt="entExt">
121   Hello, world!
122</doc>'''
123        elem = ET.fromstring(xml)
124        f = StringIO()
125        ET.ElementTree(elem).write_c14n(f)
126        c14n = f.getvalue()
127        self.assertEqual(c14n, exptdC14n)
128       
129   
130    def test05CDATASectionsReplaced(self):
131        xml = \
132"""<?xml version="1.0" encoding="UTF-8"?>
133<script>
134<![CDATA[
135function matchwo(a,b)
136{
137if (a < b && a > 0) then
138   {
139   print("Match");
140   return 1;
141   }
142else
143   {
144   print('Different');
145   return 0;
146   }
147}
148]]>
149</script>
150"""
151        elem = ET.fromstring(xml)
152        f = StringIO()
153        ET.ElementTree(elem).write_c14n(f)
154        c14n = f.getvalue()
155       
156        self.failIf('CDATA' in c14n, "CDATA not removed, c14n = %s" % c14n)
157        self.failUnless('&lt;' in c14n,
158                        "Less than not converted, c14n = %s" % c14n)
159        self.failUnless('&gt;' in c14n, 
160                        "Greater than not converted, c14n = %s" % c14n)
161        self.failUnless('&amp;' in c14n, 
162                        "Ampersand not converted, c14n = %s" % c14n)
163
164        # Test for double quotes / apostrophes?
165       
166   
167    def test06XMLDeclAndDTDRemoved(self):
168        xmlDecl = '<?xml version="1.0" encoding="UTF-8"?>'
169        dtd = \
170"""<!DOCTYPE note [
171  <!ELEMENT note (to,from,heading,body)>
172  <!ELEMENT to      (#PCDATA)>
173  <!ELEMENT from    (#PCDATA)>
174  <!ELEMENT heading (#PCDATA)>
175  <!ELEMENT body    (#PCDATA)>
176]>
177"""
178        xml = \
179"""%s
180%s<a/>""" % (xmlDecl, dtd)
181
182        elem = ET.fromstring(xml)
183        f = StringIO()
184        ET.ElementTree(elem).write_c14n(f)
185        c14n = f.getvalue()
186        self.failIf('<?xml version="1.0" encoding="UTF-8"?>' in c14n, 
187                    "XML Declaration not removed")
188        self.failIf(dtd in c14n, "DTD not removed")
189
190   
191    def test07EmptyElemsConvertedStartEndPairs(self):
192        elem = ET.fromstring('<?xml version="1.0" encoding="UTF-8"?><a/>')
193        f = StringIO()
194        ET.ElementTree(elem).write_c14n(f)
195        c14n = f.getvalue()
196        self.failUnless(c14n == '<a></a>', "C14N = %s" % c14n)
197
198         
199    def test08WhitespaceNormalized(self):
200        # ...outside the document element and within start and end tags
201        dat = \
202'''        1 2
203  3'''
204 
205        xml = \
206'''<?xml version="1.0" encoding="UTF-8"?>
207<doc xmlns="http://example.com/default">
208  <a
209     a2="2"   a1="1"
210  >%s</a>
211</doc>
212
213''' % dat
214
215        elem = ET.fromstring(xml)
216        f = StringIO()
217        ET.ElementTree(elem).write_c14n(f)
218        c14n = f.getvalue()
219       
220        self.failUnless('a1="1" a2="2"' in c14n, 
221                        "Expecting single space between attributes")
222        self.failUnless(dat in c14n, 
223                        "Expecting element content to be preserved")
224       
225        sub = c14n[c14n.find('<a'):c14n.find('>')]
226        self.failIf('\n' in sub, 
227                    "Expecting removal of line breaks for 'a' element")
228     
229     
230    def test09WhitespaceInCharContentRetained(self):
231        # http://www.w3.org/TR/xml-c14n#Example-WhitespaceInContent
232        # Nb. excludes chars removed during line break normalization
233        xml = \
234'''<doc>
235   <clean>   </clean>
236   <dirty>   A   B   </dirty>
237   <mixed>
238      A
239      <clean>   </clean>
240      B
241      <dirty>   A   B   </dirty>
242      C
243   </mixed>
244</doc>'''
245        elem = ET.fromstring(xml)
246        f = StringIO()
247        ETC14N.write(elem, f)
248        c14n = f.getvalue()
249       
250        # In this case the canonicalized form should be identical to the
251        # original
252        self.assertEqual(c14n, xml)
253
254       
255    def test10AttrValDelimitersSet2DblQuotes(self):
256        xml = \
257"""<?xml version="1.0" encoding="UTF-8"?>
258  <b y:a1='1' a3='"3"'
259     xmlns:y='http://example.com/y' y:a2='2'/>
260"""
261
262        elem = ET.fromstring(xml)
263        f = StringIO()
264        ET.ElementTree(elem).write_c14n(f)
265        c14n = f.getvalue()
266        self.failIf("'" in c14n, 
267                    "Expecting removal of apostrophes C14N = %s" % c14n)
268
269   
270    def test11SpecialCharsReplaced(self):
271        # i.e. within attribute values and character content
272        pass
273       
274       
275    def test12SuperflousNSdeclsRemoved(self):
276        extraNS = "http://example.com/default"
277        xml = \
278"""<?xml version="1.0" encoding="UTF-8"?>
279<doc xmlns:x="http://example.com/x" xmlns="%s">
280  <b y:a1='1' xmlns="%s" a3='"3"'
281     xmlns:y='http://example.com/y' y:a2='2'/>
282</doc>""" % (extraNS, extraNS)
283
284        elem = ET.fromstring(xml)
285        f = StringIO()
286        ET.ElementTree(elem).write_c14n(f)
287        c14n = f.getvalue()
288       
289        # Namespace should now only occur once...
290        self.failUnless(c14n.find(extraNS) == c14n.rfind(extraNS), 
291                    "Expecting removal of extra NS %s in output = %s" % \
292                    (extraNS, c14n))
293       
294       
295    def test13DefAttrsAdded2EachElem(self):
296        # Ref. http://www.w3.org/TR/xml-c14n#Example-SETags
297        xml = '''<!DOCTYPE doc [<!ATTLIST e9 attr CDATA "default">]>
298<doc>
299   <e1   />
300   <e2   ></e2>
301   <e3   name = "elem3"   id="elem3"   />
302   <e4   name="elem4"   id="elem4"   ></e4>
303   <e5 a:attr="out" b:attr="sorted" attr2="all" attr="I'm"
304      xmlns:b="http://www.ietf.org"
305      xmlns:a="http://www.w3.org"
306      xmlns="http://example.org"/>
307   <e6 xmlns="" xmlns:a="http://www.w3.org">
308      <e7 xmlns="http://www.ietf.org">
309         <e8 xmlns="" xmlns:a="http://www.w3.org">
310            <e9 xmlns="" xmlns:a="http://www.ietf.org"/>
311         </e8>
312      </e7>
313   </e6>
314</doc>'''
315
316        elem = ET.fromstring(xml)
317        f = StringIO()
318#        ET.ElementTree(elem).write_c14n(f)
319        ETC14N.write(elem, f)
320        c14n = f.getvalue()
321
322        exptdC14n = '''<doc>
323   <e1></e1>
324   <e2></e2>
325   <e3 id="elem3" name="elem3"></e3>
326   <e4 id="elem4" name="elem4"></e4>
327   <e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5>
328   <e6 xmlns:a="http://www.w3.org">
329      <e7 xmlns="http://www.ietf.org">
330         <e8 xmlns="">
331            <e9 xmlns:a="http://www.ietf.org" attr="default"></e9>
332         </e8>
333      </e7>
334   </e6>
335</doc>'''
336        self.assertEqual(c14n, exptdC14n)
337       
338    def test14DocumentSubsets(self):
339        # Ref. http://www.w3.org/TR/xml-c14n#Example-DocSubsets
340        xml = \
341"""<!DOCTYPE doc [
342<!ATTLIST e2 xml:space (default|preserve) 'preserve'>
343<!ATTLIST e3 id ID #IMPLIED>
344]>
345<doc xmlns="http://www.ietf.org" xmlns:w3c="http://www.w3.org">
346   <e1>
347      <e2 xmlns="">
348         <e3 id="E3"/>
349      </e2>
350   </e1>
351</doc>"""
352
353#'''<!-- Evaluate with declaration xmlns:ietf="http://www.ietf.org" -->
354        xpathExpr = \
355'''
356(//. | //@* | //namespace::*)
357[
358   self::ietf:e1 or (parent::ietf:e1 and not(self::text() or self::e2))
359   or
360   count(id("E3")|ancestor-or-self::node()) = count(ancestor-or-self::node())
361]'''
362
363        exptdC14n = \
364'<e1 xmlns="http://www.ietf.org" xmlns:w3c="http://www.w3.org"><e3 xmlns="" id="E3" xml:space="preserve"></e3></e1>'
365
366        elem = ET.fromstring(xml)
367        f = StringIO()
368        subElem = elem.find(xpathExpr)
369        ETC14N.write(elem, f, subset_element=subElem)
370        c14n = f.getvalue()
371
372    def test15CmpZSIc14n(self):
373        elem = ETC14N.parse('./windows-ac.xml')
374        ETC14N.write(elem, './et-c14n-ac.xml')
375       
376        from xml.dom.ext.reader import PyExpat
377        reader = PyExpat.Reader()
378        dom = reader.fromStream(open('./windows-ac.xml'))
379       
380        zsiC14n = Canonicalize(dom)
381        etC14n = open('./et-c14n-ac.xml').read()
382        open('./zsi-c14n-ac.xml', 'w').write(zsiC14n)
383       
384        etC14n = open('./et-c14n-ac.xml').read()
385
386        self.failUnless(etC14n == zsiC14n, "ZSI C14N output differs")
387       
388    def test16Cmplxmlc14n(self):
389        from StringIO import StringIO
390
391        elem = ETC14N.parse('./windows-ac.xml')
392        ETC14N.write(elem, './et-c14n-ac-2.xml')
393       
394       
395        from lxml import etree as lxmlET
396       
397        lxmlElem = lxmlET.parse('./windows-ac.xml')
398        lxmlETf = StringIO()
399        lxmlElem.write_c14n(lxmlETf)
400        open('./lxml-c14n-ac.xml', 'w').write(lxmlETf.getvalue())
401       
402        f1 = open('./et-c14n-ac-2.xml')
403        etC14n = f1.read()
404        f1.close()
405       
406        self.failUnless(etC14n == lxmlETf.getvalue(),
407                        "lxml C14N output differs")
408       
409       
410    def test17InclusiveC14nWithXPath(self):
411        # Inclusive Canonicalization of portions of a SOAP message extracted
412        # using XPath
413       
414        inputFile = './soapGetAttCertResponse.xml'
415       
416        reader = PyExpat.Reader()
417        dom = reader.fromStream(open(inputFile))
418        processorNss = \
419        {
420            'wsu': \
421"http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd",
422        }
423   
424        ctxt = Context(dom, processorNss=processorNss)
425        zsiRefNodes = xpath.Evaluate('//*[@wsu:Id]', 
426                                  contextNode=dom, 
427                                  context=ctxt)
428       
429        # ElementTree
430        elem = ETC14N.parse(inputFile)
431       
432        # Extract nodes for signing
433        etRefNodes = elem.findall('.//*[@wsu:Id]', namespaces=processorNss)
434       
435        for zsiRefNode, etRefNode in zip(zsiRefNodes, etRefNodes):
436            # Get ref node and all it's children
437            zsiRefC14n = Canonicalize(zsiRefNode)
438
439#            print "_"*80
440#            print "ZSI Inclusive C14N %s:\n" % zsiRefNode.nodeName
441#            print zsiRefC14n
442            open('soapGetAttCertResponse-%s-zsi-c14n.xml'%zsiRefNode.localName, 
443                 'w').write(zsiRefC14n)
444                 
445            f = StringIO()
446            ETC14N.write(elem, f, subset=etRefNode)
447            etRefC14n = f.getvalue()
448           
449#            print "_"*80
450#            print "ElementTree Inclusive C14N %s:\n" % etRefNode.tag
451#            print etRefC14n
452            open('soapGetAttCertResponse-%s-et-c14n.xml' % \
453                 etRefNode.tag.split('}')[-1], 
454                 'w').write(etRefC14n)
455            self.assertEqual(zsiRefC14n, etRefC14n)
456       
457    def test18ExclC14nWithXPath(self):
458        # Exclusive C14N applied to portions of a SOAP message by extracting
459        # using XPath
460       
461        inputFile = './soapGetAttCertResponse.xml'
462
463        reader = PyExpat.Reader()
464        dom = reader.fromStream(open(inputFile))
465        processorNss = \
466        {
467            'wsu': \
468"http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd",
469        }
470   
471        ctxt = Context(dom, processorNss=processorNss)
472        zsiRefNodes = xpath.Evaluate('//*[@wsu:Id]', 
473                                  contextNode=dom, 
474                                  context=ctxt)
475        # ElementTree
476        elem = ETC14N.parse(inputFile)
477       
478        # Extract nodes for signing
479        etRefNodes = elem.findall('.//*[@wsu:Id]', namespaces=processorNss)
480       
481        for zsiRefNode, etRefNode in zip(zsiRefNodes, etRefNodes):
482            # Get ref node and all it's children
483            refSubsetList = getChildNodes(zsiRefNode)
484            zsiRefC14n = Canonicalize(dom, None, subset=refSubsetList,
485                                   unsuppressedPrefixes=[])
486
487#            print "_"*80
488#            print "ZSI Exclusive C14N %s:\n" % zsiRefNode.nodeName
489#            print zsiRefC14n
490            open('soapGetAttCertResponse-%s-exclC14n.xml'%zsiRefNode.localName, 
491                 'w').write(zsiRefC14n)
492       
493            # ElementTree equivalent     
494            f = StringIO()
495            ETC14N.write(elem, f, etRefNode, exclusive=True)
496            etRefC14n = f.getvalue()
497           
498#            print "_"*80
499#            print "ElementTree Exclusive C14N %s:\n" % etRefNode.tag
500#            print etRefC14n
501            open('soapGetAttCertResponse-%s-et-exclC14n.xml' % \
502                 etRefNode.tag.split('}')[-1], 
503                 'w').write(etRefC14n)
504       
505            self.assertEqual(zsiRefC14n, etRefC14n)
506       
507    def test19ExclC14nWithXPathAndInclusiveNSPfx(self):
508        # Exclusive C14N applied to portions of a SOAP message by extracting
509        # using XPath
510        inputFile = './soapGetAttCertResponse.xml'
511       
512        from xml.xpath.Context import Context
513        from xml import xpath
514        from xml.dom.ext.reader import PyExpat
515        reader = PyExpat.Reader()
516        dom = reader.fromStream(open(inputFile))
517        processorNss = \
518        {
519            'wsu': \
520"http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd",
521        }
522   
523        ctxt = Context(dom, processorNss=processorNss)
524        zsiRefNodes = xpath.Evaluate('//*[@wsu:Id]', 
525                                  contextNode=dom, 
526                                  context=ctxt)
527
528        # ElementTree
529        elem = ETC14N.parse(inputFile)
530       
531        # Extract nodes for signing
532        etRefNodes = elem.findall('.//*[@wsu:Id]', namespaces=processorNss)
533       
534        nsPfx = ['SOAP-ENV', 'ds']
535        for zsiRefNode, etRefNode in zip(zsiRefNodes, etRefNodes):
536            # Get ref node and all it's children
537            refSubsetList = getChildNodes(zsiRefNode)
538            zsiRefC14n = Canonicalize(dom, None, subset=refSubsetList,
539                                   unsuppressedPrefixes=nsPfx)
540
541#            print "_"*80
542#            print "Exclusive C14N with Prefixes %s:\n" % zsiRefNode.nodeName
543#            print zsiRefC14n
544            open('soapGetAttCertResponse-%s-exclC14nWithInclPrefixes.xml' % \
545                 zsiRefNode.localName, 
546                 'w').write(zsiRefC14n)
547       
548            # ElementTree equivalent     
549            f = StringIO()
550            ETC14N.write(elem, f, subset=etRefNode, exclusive=True,
551                         inclusive_namespaces=nsPfx)
552            etRefC14n = f.getvalue()
553           
554#            print "_"*80
555#            print "ElementTree Exclusive C14N %s:\n" % etRefNode.tag
556#            print etRefC14n
557            open('soapGetAttCertResponse-%s-et-exclC14n.xml' % \
558                 etRefNode.tag.split('}')[-1], 
559                 'w').write(etRefC14n)
560
561            self.assertEqual(zsiRefC14n, etRefC14n)
562     
563
564def getChildNodes(node, nodeList=None):
565    if nodeList is None:
566        nodeList = [node] 
567    return _getChildNodes(node, nodeList=nodeList)
568           
569def _getChildNodes(node, nodeList=None):
570
571    if node.attributes is not None:
572        nodeList += node.attributes.values() 
573    nodeList += node.childNodes
574    for childNode in node.childNodes:
575        _getChildNodes(childNode, nodeList)
576    return nodeList
577
578if __name__ == "__main__":
579    unittest.main()
580
Note: See TracBrowser for help on using the repository browser.