source: TI12-security/trunk/python/ndg.security.test/ndg/security/test/elementTreeC14n/testC14n.py @ 3580

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI12-security/trunk/python/ndg.security.test/ndg/security/test/elementTreeC14n/testC14n.py@3580
Revision 3580, 17.6 KB checked in by pjkersha, 14 years ago (diff)

Updated and tested against 1.3a6 10 March ET release:

  • Tests 17 and 18 now work - inclusive and exclusive C14N on SOAP message subsets.
  • Added test 19 for inclusive namespace test but fails.
  • Property svn:executable set to *
Line 
1#!/usr/bin/env python
2"""NDG ElementTreeC14n class unit tests
3
4NERC Data Grid Project
5"""
6__author__ = "P J Kershaw"
7__date__ = "03/01/07"
8__copyright__ = "(C) 2007 STFC & NERC"
9__license__ = \
10"""This software may be distributed under the terms of the Q Public
11License, version 1.0 or later."""
12__contact__ = "P.J.Kershaw@rl.ac.uk"
13__revision__ = '$Id: ElementTreeC14nTest.py 3202 2008-01-11 13:42:34Z pjkersha $'
14
15import unittest
16import os
17import sys
18import getpass
19import traceback
20
21from difflib import unified_diff
22
23from StringIO import StringIO
24
25from elementtree import ElementC14N as ETC14N
26from elementtree import ElementTree as ET
27
28# Minidom based Canonicalization from ZSI for comparison
29from ZSI.wstools.c14n import Canonicalize
30
31from xml.xpath.Context import Context
32from xml import xpath
33from xml.dom.ext.reader import PyExpat
34
35xpdVars = os.path.expandvars
36jnPath = os.path.join
37
38class ElementTreeC14nTestCase(unittest.TestCase):
39   
40    def setUp(self):
41       
42        if 'NDGSEC_INT_DEBUG' in os.environ:
43            import pdb
44            pdb.set_trace()         
45
46    def assertEqual(self, a, b):
47        '''Override default to get something easy to read than super class
48        behaviour'''
49        if a != b:
50            diffGen = unified_diff(a.split('\n'), b.split('\n'))
51            raise AssertionError('\n'+'\n'.join(diffGen))
52       
53    def test01UTF8DocEncoding(self):
54       
55        # http://www.w3.org/TR/xml-c14n#Example-UTF8
56        xml = '<?xml version="1.0" encoding="ISO-8859-1"?><doc>&#169;</doc>'
57        elem = ET.fromstring(xml)
58        f = StringIO()
59        ETC14N.write(elem, f)
60        c14n = f.getvalue()
61        #self.assertEqual(c14n, '<doc>#xC2#xA9</doc>')
62        self.assertEqual(c14n, '<doc>\xC2\xA9</doc>')
63
64    def test01aPIsCommentsAndOutsideOfDocElem(self):
65        # http://www.w3.org/TR/xml-c14n#Example-OutsideDoc - PIs, Comments, and
66        # Outside of Document Element
67        xml = \
68'''<?xml version="1.0"?>
69
70<?xml-stylesheet   href="doc.xsl"
71   type="text/xsl"   ?>
72
73<!DOCTYPE doc SYSTEM "doc.dtd">
74
75<doc>Hello, world!<!-- Comment 1 --></doc>
76
77<?pi-without-data     ?>
78
79<!-- Comment 2 -->
80
81<!-- Comment 3 -->'''
82
83        exptdC14n = \
84'''<?xml-stylesheet href="doc.xsl"
85   type="text/xsl"   ?>
86<doc>Hello, world!</doc>
87<?pi-without-data?>'''
88
89        elem = ET.fromstring(xml)
90        f = StringIO()
91        ETC14N.write(elem, f)
92        c14n = f.getvalue()
93        self.assertEqual(c14n, exptdC14n)
94
95   
96    def test02NormalizeLineBreaks(self):
97        xml = '<?xml version="1.0" encoding="UTF-8"?>\r\n<a/>\r\n'
98        elem = ET.fromstring(xml)
99        f = StringIO()
100        ET.ElementTree(elem).write_c14n(f)
101        c14n = f.getvalue()
102        self.failIf('\r' in c14n, "Carriage return \r char found in c14n")
103
104   
105    def test03NormalizedAttrVals(self):
106        pass
107
108   
109    def test04CharAndParsedEntityRefsReplaced(self):
110        xml = '''<!DOCTYPE doc [
111<!ATTLIST doc attrExtEnt ENTITY #IMPLIED>
112<!ENTITY ent1 "Hello">
113<!ENTITY ent2 SYSTEM "world.txt">
114<!ENTITY entExt SYSTEM "earth.gif" NDATA gif>
115<!NOTATION gif SYSTEM "viewgif.exe">
116]>
117<doc attrExtEnt="entExt">
118   &ent1;, &ent2;!
119</doc>
120
121<!-- Let world.txt contain "world" (excluding the quotes) -->'''
122
123        exptdC14n = '''<doc attrExtEnt="entExt">
124   Hello, world!
125</doc>'''
126        elem = ET.fromstring(xml)
127        f = StringIO()
128        ET.ElementTree(elem).write_c14n(f)
129        c14n = f.getvalue()
130        self.assertEqual(c14n, exptdC14n)
131       
132   
133    def test05CDATASectionsReplaced(self):
134        xml = \
135"""<?xml version="1.0" encoding="UTF-8"?>
136<script>
137<![CDATA[
138function matchwo(a,b)
139{
140if (a < b && a > 0) then
141   {
142   print("Match");
143   return 1;
144   }
145else
146   {
147   print('Different');
148   return 0;
149   }
150}
151]]>
152</script>
153"""
154        elem = ET.fromstring(xml)
155        f = StringIO()
156        ET.ElementTree(elem).write_c14n(f)
157        c14n = f.getvalue()
158       
159        self.failIf('CDATA' in c14n, "CDATA not removed, c14n = %s" % c14n)
160        self.failUnless('&lt;' in c14n,
161                        "Less than not converted, c14n = %s" % c14n)
162        self.failUnless('&gt;' in c14n, 
163                        "Greater than not converted, c14n = %s" % c14n)
164        self.failUnless('&amp;' in c14n, 
165                        "Ampersand not converted, c14n = %s" % c14n)
166
167        # Test for double quotes / apostrophes?
168       
169   
170    def test06XMLDeclAndDTDRemoved(self):
171        xmlDecl = '<?xml version="1.0" encoding="UTF-8"?>'
172        dtd = \
173"""<!DOCTYPE note [
174  <!ELEMENT note (to,from,heading,body)>
175  <!ELEMENT to      (#PCDATA)>
176  <!ELEMENT from    (#PCDATA)>
177  <!ELEMENT heading (#PCDATA)>
178  <!ELEMENT body    (#PCDATA)>
179]>
180"""
181        xml = \
182"""%s
183%s<a/>""" % (xmlDecl, dtd)
184
185        elem = ET.fromstring(xml)
186        f = StringIO()
187        ET.ElementTree(elem).write_c14n(f)
188        c14n = f.getvalue()
189        self.failIf('<?xml version="1.0" encoding="UTF-8"?>' in c14n, 
190                    "XML Declaration not removed")
191        self.failIf(dtd in c14n, "DTD not removed")
192
193   
194    def test07EmptyElemsConvertedStartEndPairs(self):
195        elem = ET.fromstring('<?xml version="1.0" encoding="UTF-8"?><a/>')
196        f = StringIO()
197        ET.ElementTree(elem).write_c14n(f)
198        c14n = f.getvalue()
199        self.failUnless(c14n == '<a></a>', "C14N = %s" % c14n)
200
201         
202    def test08WhitespaceNormalized(self):
203        # ...outside the document element and within start and end tags
204        dat = \
205'''        1 2
206  3'''
207 
208        xml = \
209'''<?xml version="1.0" encoding="UTF-8"?>
210<doc xmlns="http://example.com/default">
211  <a
212     a2="2"   a1="1"
213  >%s</a>
214</doc>
215
216''' % dat
217
218        elem = ET.fromstring(xml)
219        f = StringIO()
220        ET.ElementTree(elem).write_c14n(f)
221        c14n = f.getvalue()
222       
223        self.failUnless('a1="1" a2="2"' in c14n, 
224                        "Expecting single space between attributes")
225        self.failUnless(dat in c14n, 
226                        "Expecting element content to be preserved")
227       
228        sub = c14n[c14n.find('<a'):c14n.find('>')]
229        self.failIf('\n' in sub, 
230                    "Expecting removal of line breaks for 'a' element")
231     
232     
233    def test09WhitespaceInCharContentRetained(self):
234        # http://www.w3.org/TR/xml-c14n#Example-WhitespaceInContent
235        # Nb. excludes chars removed during line break normalization
236        xml = \
237'''<doc>
238   <clean>   </clean>
239   <dirty>   A   B   </dirty>
240   <mixed>
241      A
242      <clean>   </clean>
243      B
244      <dirty>   A   B   </dirty>
245      C
246   </mixed>
247</doc>'''
248        elem = ET.fromstring(xml)
249        f = StringIO()
250        ETC14N.write(elem, f)
251        c14n = f.getvalue()
252       
253        # In this case the canonicalized form should be identical to the
254        # original
255        self.assertEqual(c14n, xml)
256
257       
258    def test10AttrValDelimitersSet2DblQuotes(self):
259        xml = \
260"""<?xml version="1.0" encoding="UTF-8"?>
261  <b y:a1='1' a3='"3"'
262     xmlns:y='http://example.com/y' y:a2='2'/>
263"""
264
265        elem = ET.fromstring(xml)
266        f = StringIO()
267        ET.ElementTree(elem).write_c14n(f)
268        c14n = f.getvalue()
269        self.failIf("'" in c14n, 
270                    "Expecting removal of apostrophes C14N = %s" % c14n)
271
272   
273    def test11SpecialCharsReplaced(self):
274        # i.e. within attribute values and character content
275        pass
276       
277       
278    def test12SuperflousNSdeclsRemoved(self):
279        extraNS = "http://example.com/default"
280        xml = \
281"""<?xml version="1.0" encoding="UTF-8"?>
282<doc xmlns:x="http://example.com/x" xmlns="%s">
283  <b y:a1='1' xmlns="%s" a3='"3"'
284     xmlns:y='http://example.com/y' y:a2='2'/>
285</doc>""" % (extraNS, extraNS)
286
287        elem = ET.fromstring(xml)
288        f = StringIO()
289        ET.ElementTree(elem).write_c14n(f)
290        c14n = f.getvalue()
291       
292        # Namespace should now only occur once...
293        self.failUnless(c14n.find(extraNS) == c14n.rfind(extraNS), 
294                    "Expecting removal of extra NS %s in output = %s" % \
295                    (extraNS, c14n))
296       
297       
298    def test13DefAttrsAdded2EachElem(self):
299        # Ref. http://www.w3.org/TR/xml-c14n#Example-SETags
300        xml = '''<!DOCTYPE doc [<!ATTLIST e9 attr CDATA "default">]>
301<doc>
302   <e1   />
303   <e2   ></e2>
304   <e3   name = "elem3"   id="elem3"   />
305   <e4   name="elem4"   id="elem4"   ></e4>
306   <e5 a:attr="out" b:attr="sorted" attr2="all" attr="I'm"
307      xmlns:b="http://www.ietf.org"
308      xmlns:a="http://www.w3.org"
309      xmlns="http://example.org"/>
310   <e6 xmlns="" xmlns:a="http://www.w3.org">
311      <e7 xmlns="http://www.ietf.org">
312         <e8 xmlns="" xmlns:a="http://www.w3.org">
313            <e9 xmlns="" xmlns:a="http://www.ietf.org"/>
314         </e8>
315      </e7>
316   </e6>
317</doc>'''
318
319        elem = ET.fromstring(xml)
320        f = StringIO()
321#        ET.ElementTree(elem).write_c14n(f)
322        ETC14N.write(elem, f)
323        c14n = f.getvalue()
324
325        exptdC14n = '''<doc>
326   <e1></e1>
327   <e2></e2>
328   <e3 id="elem3" name="elem3"></e3>
329   <e4 id="elem4" name="elem4"></e4>
330   <e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5>
331   <e6 xmlns:a="http://www.w3.org">
332      <e7 xmlns="http://www.ietf.org">
333         <e8 xmlns="">
334            <e9 xmlns:a="http://www.ietf.org" attr="default"></e9>
335         </e8>
336      </e7>
337   </e6>
338</doc>'''
339        self.assertEqual(c14n, exptdC14n)
340       
341    def test14DocumentSubsets(self):
342        # Ref. http://www.w3.org/TR/xml-c14n#Example-DocSubsets
343        xml = \
344"""<!DOCTYPE doc [
345<!ATTLIST e2 xml:space (default|preserve) 'preserve'>
346<!ATTLIST e3 id ID #IMPLIED>
347]>
348<doc xmlns="http://www.ietf.org" xmlns:w3c="http://www.w3.org">
349   <e1>
350      <e2 xmlns="">
351         <e3 id="E3"/>
352      </e2>
353   </e1>
354</doc>"""
355
356#'''<!-- Evaluate with declaration xmlns:ietf="http://www.ietf.org" -->
357        xpathExpr = \
358'''
359(//. | //@* | //namespace::*)
360[
361   self::ietf:e1 or (parent::ietf:e1 and not(self::text() or self::e2))
362   or
363   count(id("E3")|ancestor-or-self::node()) = count(ancestor-or-self::node())
364]'''
365
366        exptdC14n = \
367'<e1 xmlns="http://www.ietf.org" xmlns:w3c="http://www.w3.org"><e3 xmlns="" id="E3" xml:space="preserve"></e3></e1>'
368
369        elem = ET.fromstring(xml)
370        f = StringIO()
371        subElem = elem.find(xpathExpr)
372        ETC14N.write(elem, f, subset_element=subElem)
373        c14n = f.getvalue()
374
375    def test15CmpZSIc14n(self):
376        elem = ETC14N.parse('./windows-ac.xml')
377        ETC14N.write(elem, './et-c14n-ac.xml')
378       
379        from xml.dom.ext.reader import PyExpat
380        reader = PyExpat.Reader()
381        dom = reader.fromStream(open('./windows-ac.xml'))
382       
383        zsiC14n = Canonicalize(dom)
384        etC14n = open('./et-c14n-ac.xml').read()
385        open('./zsi-c14n-ac.xml', 'w').write(zsiC14n)
386       
387        etC14n = open('./et-c14n-ac.xml').read()
388
389        self.failUnless(etC14n == zsiC14n, "ZSI C14N output differs")
390       
391    def test16Cmplxmlc14n(self):
392        from StringIO import StringIO
393
394        elem = ETC14N.parse('./windows-ac.xml')
395        ETC14N.write(elem, './et-c14n-ac-2.xml')
396       
397       
398        from lxml import etree as lxmlET
399       
400        lxmlElem = lxmlET.parse('./windows-ac.xml')
401        lxmlETf = StringIO()
402        lxmlElem.write_c14n(lxmlETf)
403        open('./lxml-c14n-ac.xml', 'w').write(lxmlETf.getvalue())
404       
405        f1 = open('./et-c14n-ac-2.xml')
406        etC14n = f1.read()
407        f1.close()
408       
409        self.failUnless(etC14n == lxmlETf.getvalue(),
410                        "lxml C14N output differs")
411       
412       
413    def test17InclusiveC14nWithXPath(self):
414        # Inclusive Canonicalization of portions of a SOAP message extracted
415        # using XPath
416       
417        inputFile = './soapGetAttCertResponse.xml'
418       
419        reader = PyExpat.Reader()
420        dom = reader.fromStream(open(inputFile))
421        processorNss = \
422        {
423            'wsu': \
424"http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd",
425        }
426   
427        ctxt = Context(dom, processorNss=processorNss)
428        zsiRefNodes = xpath.Evaluate('//*[@wsu:Id]', 
429                                  contextNode=dom, 
430                                  context=ctxt)
431       
432        # ElementTree
433        elem = ETC14N.parse(inputFile)
434       
435        # Extract nodes for signing
436        etRefNodes = elem.findall('.//*[@wsu:Id]', namespaces=processorNss)
437       
438        for zsiRefNode, etRefNode in zip(zsiRefNodes, etRefNodes):
439            # Get ref node and all it's children
440            zsiRefC14n = Canonicalize(zsiRefNode)
441
442#            print "_"*80
443#            print "ZSI Inclusive C14N %s:\n" % zsiRefNode.nodeName
444#            print zsiRefC14n
445            open('soapGetAttCertResponse-%s-zsi-c14n.xml'%zsiRefNode.localName, 
446                 'w').write(zsiRefC14n)
447                 
448            f = StringIO()
449            ETC14N.write(elem, f, subset=etRefNode)
450            etRefC14n = f.getvalue()
451           
452#            print "_"*80
453#            print "ElementTree Inclusive C14N %s:\n" % etRefNode.tag
454#            print etRefC14n
455            open('soapGetAttCertResponse-%s-et-c14n.xml' % \
456                 etRefNode.tag.split('}')[-1], 
457                 'w').write(etRefC14n)
458            self.assertEqual(zsiRefC14n, etRefC14n)
459       
460    def test18ExclC14nWithXPath(self):
461        # Exclusive C14N applied to portions of a SOAP message by extracting
462        # using XPath
463       
464        inputFile = './soapGetAttCertResponse.xml'
465
466        reader = PyExpat.Reader()
467        dom = reader.fromStream(open(inputFile))
468        processorNss = \
469        {
470            'wsu': \
471"http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd",
472        }
473   
474        ctxt = Context(dom, processorNss=processorNss)
475        zsiRefNodes = xpath.Evaluate('//*[@wsu:Id]', 
476                                  contextNode=dom, 
477                                  context=ctxt)
478        # ElementTree
479        elem = ETC14N.parse(inputFile)
480       
481        # Extract nodes for signing
482        etRefNodes = elem.findall('.//*[@wsu:Id]', namespaces=processorNss)
483       
484        for zsiRefNode, etRefNode in zip(zsiRefNodes, etRefNodes):
485            # Get ref node and all it's children
486            refSubsetList = getChildNodes(zsiRefNode)
487            zsiRefC14n = Canonicalize(dom, None, subset=refSubsetList,
488                                   unsuppressedPrefixes=[])
489
490#            print "_"*80
491#            print "ZSI Exclusive C14N %s:\n" % zsiRefNode.nodeName
492#            print zsiRefC14n
493            open('soapGetAttCertResponse-%s-exclC14n.xml'%zsiRefNode.localName, 
494                 'w').write(zsiRefC14n)
495       
496            # ElementTree equivalent     
497            f = StringIO()
498            ETC14N.write(elem, f, etRefNode, exclusive=True)
499            etRefC14n = f.getvalue()
500           
501#            print "_"*80
502#            print "ElementTree Exclusive C14N %s:\n" % etRefNode.tag
503#            print etRefC14n
504            open('soapGetAttCertResponse-%s-et-exclC14n.xml' % \
505                 etRefNode.tag.split('}')[-1], 
506                 'w').write(etRefC14n)
507       
508            self.assertEqual(zsiRefC14n, etRefC14n)
509       
510    def test19ExclC14nWithXPathAndInclusiveNSPfx(self):
511        # Exclusive C14N applied to portions of a SOAP message by extracting
512        # using XPath
513        inputFile = './soapGetAttCertResponse.xml'
514       
515        from xml.xpath.Context import Context
516        from xml import xpath
517        from xml.dom.ext.reader import PyExpat
518        reader = PyExpat.Reader()
519        dom = reader.fromStream(open(inputFile))
520        processorNss = \
521        {
522            'wsu': \
523"http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd",
524        }
525   
526        ctxt = Context(dom, processorNss=processorNss)
527        zsiRefNodes = xpath.Evaluate('//*[@wsu:Id]', 
528                                  contextNode=dom, 
529                                  context=ctxt)
530
531        # ElementTree
532        elem = ETC14N.parse(inputFile)
533       
534        # Extract nodes for signing
535        etRefNodes = elem.findall('.//*[@wsu:Id]', namespaces=processorNss)
536       
537        nsPfx = ['SOAP-ENV', 'ds']
538        for zsiRefNode, etRefNode in zip(zsiRefNodes, etRefNodes):
539            # Get ref node and all it's children
540            refSubsetList = getChildNodes(zsiRefNode)
541            zsiRefC14n = Canonicalize(dom, None, subset=refSubsetList,
542                                   unsuppressedPrefixes=nsPfx)
543
544#            print "_"*80
545#            print "Exclusive C14N with Prefixes %s:\n" % zsiRefNode.nodeName
546#            print zsiRefC14n
547            open('soapGetAttCertResponse-%s-exclC14nWithInclPrefixes.xml' % \
548                 zsiRefNode.localName, 
549                 'w').write(zsiRefC14n)
550       
551            # ElementTree equivalent     
552            f = StringIO()
553            ETC14N.write(elem, f, subset=etRefNode, exclusive=True,
554                         inclusive_namespaces=nsPfx)
555            etRefC14n = f.getvalue()
556           
557#            print "_"*80
558#            print "ElementTree Exclusive C14N %s:\n" % etRefNode.tag
559#            print etRefC14n
560            open('soapGetAttCertResponse-%s-et-exclC14n.xml' % \
561                 etRefNode.tag.split('}')[-1], 
562                 'w').write(etRefC14n)
563
564            self.assertEqual(zsiRefC14n, etRefC14n)
565     
566
567def getChildNodes(node, nodeList=None):
568    if nodeList is None:
569        nodeList = [node] 
570    return _getChildNodes(node, nodeList=nodeList)
571           
572def _getChildNodes(node, nodeList=None):
573
574    if node.attributes is not None:
575        nodeList += node.attributes.values() 
576    nodeList += node.childNodes
577    for childNode in node.childNodes:
578        _getChildNodes(childNode, nodeList)
579    return nodeList
580
581if __name__ == "__main__":
582    unittest.main()
583
Note: See TracBrowser for help on using the repository browser.