source: TI12-security/trunk/python/Tests/foursuite_c14n/test_c14n.py @ 5056

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI12-security/trunk/python/Tests/foursuite_c14n/test_c14n.py@5056
Revision 5056, 18.0 KB checked in by pjkersha, 11 years ago (diff)

Testing 4Suite-XML's C14N implementation. This package contains an adapted version of the unit tests used to validate the ElementTree C14N. The 4Suite version fails on three tests but none of these are important for the requirement to support WS-Security: one test is using a complicated XPath, the other two tests fail because of undefined URNs in DOCTYPE elements. The 4Suite implementation should be suitable then for use with a ZSI based WS-Security signature handler. It should give better performance than the DOM based version and avoid PyXML incompatibilities for future Python 2.6 migration.

  • Property svn:executable set to *
Line 
1#!/usr/bin/env python
2"""NDG 4Suite XML C14N tests
3
4NERC Data Grid Project
5"""
6__author__ = "P J Kershaw"
7__date__ = "03/02/09"
8__copyright__ = "(C) 2009 Science and Technology Facilities Council"
9__license__ = "BSD - see LICENSE file in top-level directory"
10__contact__ = "Philip.Kershaw@stfc.ac.uk"
11__revision__ = '$Id: ElementTreeC14nTest.py 3202 2008-01-11 13:42:34Z pjkersha $'
12import unittest
13import os
14import sys
15import getpass
16import traceback
17
18from difflib import unified_diff
19
20from StringIO import StringIO
21
22
23from Ft.Xml.Domlette import NonvalidatingReader, CanonicalPrint
24from Ft.Xml import XPath
25
26# Minidom based Canonicalization from ZSI for comparison
27from ZSI.wstools.c14n import Canonicalize
28
29from xml.xpath.Context import Context
30from xml import xpath
31from xml.dom.ext.reader import PyExpat
32
33xpdVars = os.path.expandvars
34jnPath = os.path.join
35from os.path import dirname
36_thisDir = dirname(__file__)
37mkPath = lambda file: jnPath(_thisDir, file)
38
39class ElementTreeC14nTestCase(unittest.TestCase):
40   
41    def setUp(self):
42       
43        if 'NDGSEC_INT_DEBUG' in os.environ:
44            import pdb
45            pdb.set_trace()         
46
47    def assertEqual(self, a, b):
48        '''Override default to get something easy to read than super class
49        behaviour'''
50        if a != b:
51            diffGen = unified_diff(a.split('\n'), b.split('\n'))
52            raise AssertionError('\n'+'\n'.join(diffGen))
53       
54    def test01UTF8DocEncoding(self):
55       
56        # http://www.w3.org/TR/xml-c14n#Example-UTF8
57        xml = '<?xml version="1.0" encoding="ISO-8859-1"?><doc>&#169;</doc>'
58        ftDoc = NonvalidatingReader.parseString(xml)
59        f = StringIO()
60        CanonicalPrint(ftDoc, f)
61        c14n = f.getvalue()
62        #self.assertEqual(c14n, '<doc>#xC2#xA9</doc>')
63        self.assertEqual(c14n, '<doc>\xC2\xA9</doc>')
64
65# Fails but not critical to use case - problems with URNs:
66#
67# UriException: The URI scheme urn is not supported by resolver FtUriResolver
68    def test01aPIsCommentsAndOutsideOfDocElem(self):
69        # http://www.w3.org/TR/xml-c14n#Example-OutsideDoc - PIs, Comments, and
70        # Outside of Document Element
71        xml = \
72'''<?xml version="1.0"?>
73
74<?xml-stylesheet   href="doc.xsl"
75   type="text/xsl"   ?>
76
77<!DOCTYPE doc SYSTEM>
78
79<doc>Hello, world!<!-- Comment 1 --></doc>
80
81<?pi-without-data     ?>
82
83<!-- Comment 2 -->
84
85<!-- Comment 3 -->'''
86#'''<?xml version="1.0"?>
87#
88#<?xml-stylesheet   href="doc.xsl"
89#   type="text/xsl"   ?>
90#
91#<!DOCTYPE doc SYSTEM "doc.dtd">
92#
93#<doc>Hello, world!<!-- Comment 1 --></doc>
94#
95#<?pi-without-data     ?>
96#
97#<!-- Comment 2 -->
98#
99#<!-- Comment 3 -->'''
100
101        exptdC14n = \
102'''<?xml-stylesheet href="doc.xsl"
103   type="text/xsl"   ?>
104<doc>Hello, world!</doc>
105<?pi-without-data?>'''
106
107        ftDoc = NonvalidatingReader.parseString(xml)
108        f = StringIO()
109        CanonicalPrint(ftDoc, f)
110        c14n = f.getvalue()
111        self.assertEqual(c14n, exptdC14n)
112
113   
114    def test02NormalizeLineBreaks(self):
115        xml = '<?xml version="1.0" encoding="UTF-8"?>\r\n<a/>\r\n'
116        ftDoc = NonvalidatingReader.parseString(xml)
117        f = StringIO()
118        CanonicalPrint(ftDoc, f)
119        c14n = f.getvalue()
120        self.failIf('\r' in c14n, "Carriage return \r char found in c14n")
121
122   
123    def test03NormalizedAttrVals(self):
124        pass
125
126   
127# Fails but not critical to use case - problems with URNs:
128#
129# UriException: The URI scheme urn is not supported by resolver FtUriResolver
130    def test04CharAndParsedEntityRefsReplaced(self):
131        xml = '''<!DOCTYPE doc [
132<!ATTLIST doc attrExtEnt ENTITY #IMPLIED>
133<!ENTITY ent1 "Hello">
134<!ENTITY ent2 SYSTEM "world.txt">
135<!ENTITY entExt SYSTEM "earth.gif" NDATA gif>
136<!NOTATION gif SYSTEM "viewgif.exe">
137]>
138<doc attrExtEnt="entExt">
139   &ent1;, &ent2;!
140</doc>
141
142<!-- Let world.txt contain "world" (excluding the quotes) -->'''
143
144        exptdC14n = '''<doc attrExtEnt="entExt">
145   Hello, world!
146</doc>'''
147        ftDoc = NonvalidatingReader.parseString(xml)
148        f = StringIO()
149        CanonicalPrint(ftDoc, f)
150        c14n = f.getvalue()
151        self.assertEqual(c14n, exptdC14n)
152       
153   
154    def test05CDATASectionsReplaced(self):
155        xml = \
156"""<?xml version="1.0" encoding="UTF-8"?>
157<script>
158<![CDATA[
159function matchwo(a,b)
160{
161if (a < b && a > 0) then
162   {
163   print("Match");
164   return 1;
165   }
166else
167   {
168   print('Different');
169   return 0;
170   }
171}
172]]>
173</script>
174"""
175        ftDoc = NonvalidatingReader.parseString(xml)
176        f = StringIO()
177        CanonicalPrint(ftDoc, f)
178        c14n = f.getvalue()
179       
180        self.failIf('CDATA' in c14n, "CDATA not removed, c14n = %s" % c14n)
181        self.failUnless('&lt;' in c14n,
182                        "Less than not converted, c14n = %s" % c14n)
183        self.failUnless('&gt;' in c14n, 
184                        "Greater than not converted, c14n = %s" % c14n)
185        self.failUnless('&amp;' in c14n, 
186                        "Ampersand not converted, c14n = %s" % c14n)
187
188        # Test for double quotes / apostrophes?
189       
190   
191    def test06XMLDeclAndDTDRemoved(self):
192        xmlDecl = '<?xml version="1.0" encoding="UTF-8"?>'
193        dtd = \
194"""<!DOCTYPE note [
195  <!ELEMENT note (to,from,heading,body)>
196  <!ELEMENT to      (#PCDATA)>
197  <!ELEMENT from    (#PCDATA)>
198  <!ELEMENT heading (#PCDATA)>
199  <!ELEMENT body    (#PCDATA)>
200]>
201"""
202        xml = \
203"""%s
204%s<a/>""" % (xmlDecl, dtd)
205
206        ftDoc = NonvalidatingReader.parseString(xml)
207        f = StringIO()
208        CanonicalPrint(ftDoc, f)
209        c14n = f.getvalue()
210        self.failIf('<?xml version="1.0" encoding="UTF-8"?>' in c14n, 
211                    "XML Declaration not removed")
212        self.failIf(dtd in c14n, "DTD not removed")
213
214   
215    def test07EmptyElemsConvertedStartEndPairs(self):
216        xml = '<?xml version="1.0" encoding="UTF-8"?><a/>'
217        ftDoc = NonvalidatingReader.parseString(xml)
218        f = StringIO()
219        CanonicalPrint(ftDoc, f)
220        c14n = f.getvalue()
221        self.failUnless(c14n == '<a></a>', "C14N = %s" % c14n)
222
223         
224    def test08WhitespaceNormalized(self):
225        # ...outside the document element and within start and end tags
226        dat = \
227'''        1 2
228  3'''
229 
230        xml = \
231'''<?xml version="1.0" encoding="UTF-8"?>
232<doc xmlns="http://example.com/default">
233  <a
234     a2="2"   a1="1"
235  >%s</a>
236</doc>
237
238''' % dat
239
240        ftDoc = NonvalidatingReader.parseString(xml)
241        f = StringIO()
242        CanonicalPrint(ftDoc, f)
243        c14n = f.getvalue()
244       
245        self.failUnless('a1="1" a2="2"' in c14n, 
246                        "Expecting single space between attributes")
247        self.failUnless(dat in c14n, 
248                        "Expecting element content to be preserved")
249       
250        sub = c14n[c14n.find('<a'):c14n.find('>')]
251        self.failIf('\n' in sub, 
252                    "Expecting removal of line breaks for 'a' element")
253     
254     
255    def test09WhitespaceInCharContentRetained(self):
256        # http://www.w3.org/TR/xml-c14n#Example-WhitespaceInContent
257        # Nb. excludes chars removed during line break normalization
258        xml = \
259'''<doc>
260   <clean>   </clean>
261   <dirty>   A   B   </dirty>
262   <mixed>
263      A
264      <clean>   </clean>
265      B
266      <dirty>   A   B   </dirty>
267      C
268   </mixed>
269</doc>'''
270        ftDoc = NonvalidatingReader.parseString(xml)
271        f = StringIO()
272        CanonicalPrint(ftDoc, f)
273        c14n = f.getvalue()
274       
275        # In this case the canonicalized form should be identical to the
276        # original
277        self.assertEqual(c14n, xml)
278
279       
280    def test10AttrValDelimitersSet2DblQuotes(self):
281        xml = \
282"""<?xml version="1.0" encoding="UTF-8"?>
283  <b y:a1='1' a3='"3"'
284     xmlns:y='http://example.com/y' y:a2='2'/>
285"""
286
287        ftDoc = NonvalidatingReader.parseString(xml)
288        f = StringIO()
289        CanonicalPrint(ftDoc, f)
290        c14n = f.getvalue()
291        self.failIf("'" in c14n, 
292                    "Expecting removal of apostrophes C14N = %s" % c14n)
293
294   
295    def test11SpecialCharsReplaced(self):
296        # i.e. within attribute values and character content
297        pass
298       
299       
300    def test12SuperflousNSdeclsRemoved(self):
301        extraNS = "http://example.com/default"
302        xml = \
303"""<?xml version="1.0" encoding="UTF-8"?>
304<doc xmlns:x="http://example.com/x" xmlns="%s">
305  <b y:a1='1' xmlns="%s" a3='"3"'
306     xmlns:y='http://example.com/y' y:a2='2'/>
307</doc>""" % (extraNS, extraNS)
308
309        ftDoc = NonvalidatingReader.parseString(xml)
310        f = StringIO()
311        CanonicalPrint(ftDoc, f)
312        c14n = f.getvalue()
313       
314        # Namespace should now only occur once...
315        self.failUnless(c14n.find(extraNS) == c14n.rfind(extraNS), 
316                    "Expecting removal of extra NS %s in output = %s" % \
317                    (extraNS, c14n))
318       
319       
320    def test13DefAttrsAdded2EachElem(self):
321        # Ref. http://www.w3.org/TR/xml-c14n#Example-SETags
322        xml = '''<!DOCTYPE doc [<!ATTLIST e9 attr CDATA "default">]>
323<doc>
324   <e1   />
325   <e2   ></e2>
326   <e3   name = "elem3"   id="elem3"   />
327   <e4   name="elem4"   id="elem4"   ></e4>
328   <e5 a:attr="out" b:attr="sorted" attr2="all" attr="I'm"
329      xmlns:b="http://www.ietf.org"
330      xmlns:a="http://www.w3.org"
331      xmlns="http://example.org"/>
332   <e6 xmlns="" xmlns:a="http://www.w3.org">
333      <e7 xmlns="http://www.ietf.org">
334         <e8 xmlns="" xmlns:a="http://www.w3.org">
335            <e9 xmlns="" xmlns:a="http://www.ietf.org"/>
336         </e8>
337      </e7>
338   </e6>
339</doc>'''
340
341        ftDoc = NonvalidatingReader.parseString(xml)
342        f = StringIO()
343        CanonicalPrint(ftDoc, f)
344        c14n = f.getvalue()
345
346        exptdC14n = '''<doc>
347   <e1></e1>
348   <e2></e2>
349   <e3 id="elem3" name="elem3"></e3>
350   <e4 id="elem4" name="elem4"></e4>
351   <e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5>
352   <e6 xmlns:a="http://www.w3.org">
353      <e7 xmlns="http://www.ietf.org">
354         <e8 xmlns="">
355            <e9 xmlns:a="http://www.ietf.org" attr="default"></e9>
356         </e8>
357      </e7>
358   </e6>
359</doc>'''
360        self.assertEqual(c14n, exptdC14n)
361
362# Fails with:
363#
364# RuntimeException: Undefined namespace prefix: "ietf"       
365    def test14DocumentSubsets(self):
366        # Ref. http://www.w3.org/TR/xml-c14n#Example-DocSubsets
367        xml = \
368"""<!DOCTYPE doc [
369<!ATTLIST e2 xml:space (default|preserve) 'preserve'>
370<!ATTLIST e3 id ID #IMPLIED>
371]>
372<doc xmlns="http://www.ietf.org" xmlns:w3c="http://www.w3.org">
373   <e1>
374      <e2 xmlns="">
375         <e3 id="E3"/>
376      </e2>
377   </e1>
378</doc>"""
379
380#'''<!-- Evaluate with declaration xmlns:ietf="http://www.ietf.org" -->
381        xpathExpr = \
382'''
383(//. | //@* | //namespace::*)
384[
385   self::ietf:e1 or (parent::ietf:e1 and not(self::text() or self::e2))
386   or
387   count(id("E3")|ancestor-or-self::node()) = count(ancestor-or-self::node())
388]'''
389
390        exptdC14n = \
391'<e1 xmlns="http://www.ietf.org" xmlns:w3c="http://www.w3.org"><e3 xmlns="" id="E3" xml:space="preserve"></e3></e1>'
392
393        ftDoc = NonvalidatingReader.parseString(xml)
394
395        xpathExpression = XPath.Compile(xpathExpr)
396        ctx = XPath.Context.Context(ftDoc)
397        ftNode = xpathExpression.evaluate(ctx)
398        f = StringIO()
399        CanonicalPrint(ftNode, f)
400        c14n = f.getvalue()
401
402    def test15CmpZSIc14n(self):
403        ftDoc=NonvalidatingReader.parseUri('file://'+mkPath('windows-ac.xml'))       
404        ftOut = StringIO()
405        CanonicalPrint(ftDoc, ftOut)
406        ftC14n = ftOut.getvalue()
407       
408        reader = PyExpat.Reader()
409        dom = reader.fromStream(open('./windows-ac.xml'))
410       
411        zsiC14n = Canonicalize(dom)
412        self.failUnless(ftC14n == zsiC14n, "ZSI C14N output differs")
413       
414    def test16Cmplxmlc14n(self):
415        ftDoc=NonvalidatingReader.parseUri('file://'+mkPath('windows-ac.xml'))       
416        ftOut = StringIO()
417        CanonicalPrint(ftDoc, ftOut)
418        ftC14n = ftOut.getvalue()       
419       
420        from lxml import etree as lxmlET
421       
422        lxmlElem = lxmlET.parse('./windows-ac.xml')
423        lxmlETf = StringIO()
424        lxmlElem.write_c14n(lxmlETf)
425        lxmlETC14n = lxmlETf.getvalue()
426       
427        self.failUnless(ftC14n == lxmlETC14n, "lxml C14N output differs")
428       
429       
430    def test17InclusiveC14nWithXPath(self):
431        # Inclusive Canonicalization of portions of a SOAP message extracted
432        # using XPath
433       
434        inputFile = mkPath('soapGetAttCertResponse.xml')
435       
436        reader = PyExpat.Reader()
437        dom = reader.fromStream(open(inputFile))
438        processorNss = \
439        {
440            'wsu': ("http://docs.oasis-open.org/wss/2004/01/"
441                    "oasis-200401-wss-wssecurity-utility-1.0.xsd"),
442        }
443   
444        ctxt = Context(dom, processorNss=processorNss)
445        zsiRefNodes = xpath.Evaluate('//*[@wsu:Id]', 
446                                     contextNode=dom, 
447                                     context=ctxt)
448       
449        # 4Suite
450        ftDoc=NonvalidatingReader.parseUri('file://'+inputFile)       
451        ftOut = StringIO()
452       
453        # Extract nodes for signing
454        xpathExpression = XPath.Compile('//*[@wsu:Id]')
455        ctx = XPath.Context.Context(ftDoc, processorNss=processorNss)
456        ftRefNodes = xpathExpression.evaluate(ctx)
457       
458        for zsiRefNode, ftRefNode in zip(zsiRefNodes, ftRefNodes):
459            # Get ref node and all it's children
460            zsiRefC14n = Canonicalize(zsiRefNode)
461
462            print("_"*80)
463            print("ZSI Inclusive C14N %s:\n" % zsiRefNode.nodeName)
464            print(zsiRefC14n)
465                 
466            ftOut = StringIO()
467            CanonicalPrint(ftRefNode, ftOut)
468            ftRefC14n = ftOut.getvalue()       
469           
470            print("_"*80)
471            print("4Suite XML Inclusive C14N %s:\n", ftRefNode.nodeName)
472            print(ftRefC14n)
473            self.assertEqual(zsiRefC14n, ftRefC14n)
474       
475    def test18ExclC14nWithXPath(self):
476        # Exclusive C14N applied to portions of a SOAP message by extracting
477        # using XPath
478       
479        inputFile = mkPath('soapGetAttCertResponse.xml')
480
481        reader = PyExpat.Reader()
482        dom = reader.fromStream(open(inputFile))
483        processorNss = \
484        {
485            'wsu': ("http://docs.oasis-open.org/wss/2004/01/"
486                    "oasis-200401-wss-wssecurity-utility-1.0.xsd"),
487        }
488   
489        ctxt = Context(dom, processorNss=processorNss)
490        zsiRefNodes = xpath.Evaluate('//*[@wsu:Id]', 
491                                  contextNode=dom, 
492                                  context=ctxt)
493        # 4Suite
494        ftDoc=NonvalidatingReader.parseUri('file://'+inputFile)       
495        ftOut = StringIO()
496       
497        # Extract nodes for signing
498        xpathExpression = XPath.Compile('//*[@wsu:Id]')
499        ctx = XPath.Context.Context(ftDoc, processorNss=processorNss)
500        ftRefNodes = xpathExpression.evaluate(ctx)
501       
502        for zsiRefNode, ftRefNode in zip(zsiRefNodes, ftRefNodes):
503            # Get ref node and all it's children
504            refSubsetList = getChildNodes(zsiRefNode)
505            zsiRefC14n = Canonicalize(dom, None, subset=refSubsetList,
506                                      unsuppressedPrefixes=[])
507
508            print("_"*80)
509            print("ZSI Exclusive C14N %s:\n", zsiRefNode.nodeName)
510            print(zsiRefC14n)
511
512       
513            # 4Suite equivalent     
514            ftOut = StringIO()
515            CanonicalPrint(ftRefNode, stream=ftOut, exclusive=True)
516            ftRefC14n = ftOut.getvalue()       
517           
518            print("_"*80)
519            print("4Suite Exclusive C14N %s:\n", ftRefNode.nodeName)
520            print(ftRefC14n)
521       
522            self.assertEqual(zsiRefC14n, ftRefC14n)
523       
524    def test19ExclC14nWithXPathAndInclusiveNSPfx(self):
525        # Exclusive C14N applied to portions of a SOAP message by extracting
526        # using XPath
527        inputFile = mkPath('soapGetAttCertResponse.xml')
528       
529        from xml.xpath.Context import Context
530        from xml import xpath
531        from xml.dom.ext.reader import PyExpat
532        reader = PyExpat.Reader()
533        dom = reader.fromStream(open(inputFile))
534        processorNss = \
535        {
536            'wsu': ("http://docs.oasis-open.org/wss/2004/01/"
537                    "oasis-200401-wss-wssecurity-utility-1.0.xsd"),
538        }
539   
540        ctxt = Context(dom, processorNss=processorNss)
541        zsiRefNodes = xpath.Evaluate('//*[@wsu:Id]', 
542                                  contextNode=dom, 
543                                  context=ctxt)
544
545        # 4Suite
546        ftDoc=NonvalidatingReader.parseUri('file://'+inputFile)       
547        ftOut = StringIO()
548       
549        # Extract nodes for signing
550        xpathExpression = XPath.Compile('//*[@wsu:Id]')
551        ctx = XPath.Context.Context(ftDoc, processorNss=processorNss)
552        ftRefNodes = xpathExpression.evaluate(ctx)
553       
554        nsPfx = ['SOAP-ENV', 'ds']
555        for zsiRefNode, ftRefNode in zip(zsiRefNodes, ftRefNodes):
556            # Get ref node and all it's children
557            refSubsetList = getChildNodes(zsiRefNode)
558            zsiRefC14n = Canonicalize(dom, None, subset=refSubsetList,
559                                      unsuppressedPrefixes=nsPfx)
560
561            print("_"*80)
562            print("4Suite C14N with Prefixes %s:\n", zsiRefNode.nodeName)
563            print(zsiRefC14n)
564       
565            # 4Suite equivalent     
566            ftOut = StringIO()
567            CanonicalPrint(ftRefNode, stream=ftOut, exclusive=True,
568                           inclusivePrefixes=nsPfx)
569            ftRefC14n = ftOut.getvalue()       
570           
571            print("_"*80)
572            print("4Suite Exclusive C14N %s:\n", ftRefNode.nodeName)
573            print(ftRefC14n)
574
575            self.assertEqual(zsiRefC14n, ftRefC14n)
576     
577
578def getChildNodes(node, nodeList=None):
579    if nodeList is None:
580        nodeList = [node] 
581    return _getChildNodes(node, nodeList=nodeList)
582           
583def _getChildNodes(node, nodeList=None):
584
585    if node.attributes is not None:
586        nodeList += node.attributes.values() 
587    nodeList += node.childNodes
588    for childNode in node.childNodes:
589        _getChildNodes(childNode, nodeList)
590    return nodeList
591
592if __name__ == "__main__":
593    unittest.main()
594
Note: See TracBrowser for help on using the repository browser.