source: TI12-security/trunk/NDGSecurity/python/Tests/foursuite_c14n/test_c14n.py @ 7080

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI12-security/trunk/NDGSecurity/python/Tests/foursuite_c14n/test_c14n.py@7080
Revision 7080, 18.0 KB checked in by pjkersha, 9 years ago (diff)
  • Property svn:executable set to *
  • Property svn:keywords set to Id
Line 
1#!/usr/bin/env python
2"""NDG 4Suite XML C14N tests
3
4NERC Data Grid Project
5"""
6__author__ = "P J Kershaw"
7__date__ = "03/02/09"
8__copyright__ = "(C) 2009 Science and Technology Facilities Council"
9__license__ = "BSD - see LICENSE file in top-level directory"
10__contact__ = "Philip.Kershaw@stfc.ac.uk"
11__revision__ = '$Id$'
12import unittest
13import os
14import sys
15import getpass
16import traceback
17
18from difflib import unified_diff
19
20from StringIO import StringIO
21
22
23from Ft.Xml.Domlette import NonvalidatingReader, CanonicalPrint
24from Ft.Xml import XPath
25
26# Minidom based Canonicalization from ZSI for comparison
27from ZSI.wstools.c14n import Canonicalize
28
29from xml.xpath.Context import Context
30from xml import xpath
31from xml.dom.ext.reader import PyExpat
32
33xpdVars = os.path.expandvars
34jnPath = os.path.join
35from os.path import dirname
36_thisDir = dirname(__file__)
37mkPath = lambda file: jnPath(_thisDir, file)
38
39class ElementTreeC14nTestCase(unittest.TestCase):
40   
41    def setUp(self):
42       
43        if 'NDGSEC_INT_DEBUG' in os.environ:
44            import pdb
45            pdb.set_trace()         
46
47    def assertEqual(self, a, b):
48        '''Override default to get something easy to read than super class
49        behaviour'''
50        if a != b:
51            diffGen = unified_diff(a.split('\n'), b.split('\n'))
52            raise AssertionError('\n'+'\n'.join(diffGen))
53       
54    def test01UTF8DocEncoding(self):
55       
56        # http://www.w3.org/TR/xml-c14n#Example-UTF8
57        xml = '<?xml version="1.0" encoding="ISO-8859-1"?><doc>&#169;</doc>'
58        ftDoc = NonvalidatingReader.parseString(xml)
59        f = StringIO()
60        CanonicalPrint(ftDoc, f)
61        c14n = f.getvalue()
62        #self.assertEqual(c14n, '<doc>#xC2#xA9</doc>')
63        self.assertEqual(c14n, '<doc>\xC2\xA9</doc>')
64
65# Fails but not critical to use case - problems with URNs:
66#
67# UriException: The URI scheme urn is not supported by resolver FtUriResolver
68    def test01aPIsCommentsAndOutsideOfDocElem(self):
69        # http://www.w3.org/TR/xml-c14n#Example-OutsideDoc - PIs, Comments, and
70        # Outside of Document Element
71        xml = \
72'''<?xml version="1.0"?>
73
74<?xml-stylesheet   href="doc.xsl"
75   type="text/xsl"   ?>
76
77<!DOCTYPE doc SYSTEM>
78
79<doc>Hello, world!<!-- Comment 1 --></doc>
80
81<?pi-without-data     ?>
82
83<!-- Comment 2 -->
84
85<!-- Comment 3 -->'''
86#'''<?xml version="1.0"?>
87#
88#<?xml-stylesheet   href="doc.xsl"
89#   type="text/xsl"   ?>
90#
91#<!DOCTYPE doc SYSTEM "doc.dtd">
92#
93#<doc>Hello, world!<!-- Comment 1 --></doc>
94#
95#<?pi-without-data     ?>
96#
97#<!-- Comment 2 -->
98#
99#<!-- Comment 3 -->'''
100
101        exptdC14n = \
102'''<?xml-stylesheet href="doc.xsl"
103   type="text/xsl"   ?>
104<doc>Hello, world!</doc>
105<?pi-without-data?>'''
106
107        ftDoc = NonvalidatingReader.parseString(xml)
108        f = StringIO()
109        CanonicalPrint(ftDoc, f)
110        c14n = f.getvalue()
111        self.assertEqual(c14n, exptdC14n)
112
113   
114    def test02NormalizeLineBreaks(self):
115        xml = '<?xml version="1.0" encoding="UTF-8"?>\r\n<a/>\r\n'
116        ftDoc = NonvalidatingReader.parseString(xml)
117        f = StringIO()
118        CanonicalPrint(ftDoc, f)
119        c14n = f.getvalue()
120        self.failIf('\r' in c14n, "Carriage return \r char found in c14n")
121
122   
123    def test03NormalizedAttrVals(self):
124        pass
125
126   
127# Fails but not critical to use case - problems with URNs:
128#
129# UriException: The URI scheme urn is not supported by resolver FtUriResolver
130    def test04CharAndParsedEntityRefsReplaced(self):
131        xml = '''<!DOCTYPE doc [
132<!ATTLIST doc attrExtEnt ENTITY #IMPLIED>
133<!ENTITY ent1 "Hello">
134<!ENTITY ent2 SYSTEM "world.txt">
135<!ENTITY entExt SYSTEM "earth.gif" NDATA gif>
136<!NOTATION gif SYSTEM "viewgif.exe">
137]>
138<doc attrExtEnt="entExt">
139   &ent1;, &ent2;!
140</doc>
141
142<!-- Let world.txt contain "world" (excluding the quotes) -->'''
143
144        exptdC14n = '''<doc attrExtEnt="entExt">
145   Hello, world!
146</doc>'''
147        ftDoc = NonvalidatingReader.parseString(xml)
148        f = StringIO()
149        CanonicalPrint(ftDoc, f)
150        c14n = f.getvalue()
151        self.assertEqual(c14n, exptdC14n)
152       
153   
154    def test05CDATASectionsReplaced(self):
155        xml = \
156"""<?xml version="1.0" encoding="UTF-8"?>
157<script>
158<![CDATA[
159function matchwo(a,b)
160{
161if (a < b && a > 0) then
162   {
163   print("Match");
164   return 1;
165   }
166else
167   {
168   print('Different');
169   return 0;
170   }
171}
172]]>
173</script>
174"""
175        ftDoc = NonvalidatingReader.parseString(xml)
176        f = StringIO()
177        CanonicalPrint(ftDoc, f)
178        c14n = f.getvalue()
179       
180        self.failIf('CDATA' in c14n, "CDATA not removed, c14n = %s" % c14n)
181        self.failUnless('&lt;' in c14n,
182                        "Less than not converted, c14n = %s" % c14n)
183        self.failUnless('&gt;' in c14n, 
184                        "Greater than not converted, c14n = %s" % c14n)
185        self.failUnless('&amp;' in c14n, 
186                        "Ampersand not converted, c14n = %s" % c14n)
187
188        # Test for double quotes / apostrophes?
189       
190   
191    def test06XMLDeclAndDTDRemoved(self):
192        xmlDecl = '<?xml version="1.0" encoding="UTF-8"?>'
193        dtd = \
194"""<!DOCTYPE note [
195  <!ELEMENT note (to,from,heading,body)>
196  <!ELEMENT to      (#PCDATA)>
197  <!ELEMENT from    (#PCDATA)>
198  <!ELEMENT heading (#PCDATA)>
199  <!ELEMENT body    (#PCDATA)>
200]>
201"""
202        xml = \
203"""%s
204%s<a/>""" % (xmlDecl, dtd)
205
206        ftDoc = NonvalidatingReader.parseString(xml)
207        f = StringIO()
208        CanonicalPrint(ftDoc, f)
209        c14n = f.getvalue()
210        self.failIf('<?xml version="1.0" encoding="UTF-8"?>' in c14n, 
211                    "XML Declaration not removed")
212        self.failIf(dtd in c14n, "DTD not removed")
213
214   
215    def test07EmptyElemsConvertedStartEndPairs(self):
216        xml = '<?xml version="1.0" encoding="UTF-8"?><a/>'
217        ftDoc = NonvalidatingReader.parseString(xml)
218        f = StringIO()
219        CanonicalPrint(ftDoc, f)
220        c14n = f.getvalue()
221        self.failUnless(c14n == '<a></a>', "C14N = %s" % c14n)
222
223         
224    def test08WhitespaceNormalized(self):
225        # ...outside the document element and within start and end tags
226        dat = \
227'''        1 2
228  3'''
229 
230        xml = \
231'''<?xml version="1.0" encoding="UTF-8"?>
232<doc xmlns="http://example.com/default">
233  <a
234     a2="2"   a1="1"
235  >%s</a>
236</doc>
237
238''' % dat
239
240        ftDoc = NonvalidatingReader.parseString(xml)
241        f = StringIO()
242        CanonicalPrint(ftDoc, f)
243        c14n = f.getvalue()
244       
245        self.failUnless('a1="1" a2="2"' in c14n, 
246                        "Expecting single space between attributes")
247        self.failUnless(dat in c14n, 
248                        "Expecting element content to be preserved")
249       
250        sub = c14n[c14n.find('<a'):c14n.find('>')]
251        self.failIf('\n' in sub, 
252                    "Expecting removal of line breaks for 'a' element")
253     
254     
255    def test09WhitespaceInCharContentRetained(self):
256        # http://www.w3.org/TR/xml-c14n#Example-WhitespaceInContent
257        # Nb. excludes chars removed during line break normalization
258        xml = \
259'''<doc>
260   <clean>   </clean>
261   <dirty>   A   B   </dirty>
262   <mixed>
263      A
264      <clean>   </clean>
265      B
266      <dirty>   A   B   </dirty>
267      C
268   </mixed>
269</doc>'''
270        ftDoc = NonvalidatingReader.parseString(xml)
271        f = StringIO()
272        CanonicalPrint(ftDoc, f)
273        c14n = f.getvalue()
274       
275        # In this case the canonicalized form should be identical to the
276        # original
277        self.assertEqual(c14n, xml)
278
279       
280    def test10AttrValDelimitersSet2DblQuotes(self):
281        xml = \
282"""<?xml version="1.0" encoding="UTF-8"?>
283  <b y:a1='1' a3='"3"'
284     xmlns:y='http://example.com/y' y:a2='2'/>
285"""
286
287        ftDoc = NonvalidatingReader.parseString(xml)
288        f = StringIO()
289        CanonicalPrint(ftDoc, f)
290        c14n = f.getvalue()
291        self.failIf("'" in c14n, 
292                    "Expecting removal of apostrophes C14N = %s" % c14n)
293
294   
295    def test11SpecialCharsReplaced(self):
296        # i.e. within attribute values and character content
297        pass
298       
299       
300    def test12SuperflousNSdeclsRemoved(self):
301        extraNS = "http://example.com/default"
302        xml = \
303"""<?xml version="1.0" encoding="UTF-8"?>
304<doc xmlns:x="http://example.com/x" xmlns="%s">
305  <b y:a1='1' xmlns="%s" a3='"3"'
306     xmlns:y='http://example.com/y' y:a2='2'/>
307</doc>""" % (extraNS, extraNS)
308
309        ftDoc = NonvalidatingReader.parseString(xml)
310        f = StringIO()
311        CanonicalPrint(ftDoc, f)
312        c14n = f.getvalue()
313       
314        # Namespace should now only occur once...
315        self.failUnless(c14n.find(extraNS) == c14n.rfind(extraNS), 
316                    "Expecting removal of extra NS %s in output = %s" % \
317                    (extraNS, c14n))
318       
319       
320    def test13DefAttrsAdded2EachElem(self):
321        # Ref. http://www.w3.org/TR/xml-c14n#Example-SETags
322        xml = '''<!DOCTYPE doc [<!ATTLIST e9 attr CDATA "default">]>
323<doc>
324   <e1   />
325   <e2   ></e2>
326   <e3   name = "elem3"   id="elem3"   />
327   <e4   name="elem4"   id="elem4"   ></e4>
328   <e5 a:attr="out" b:attr="sorted" attr2="all" attr="I'm"
329      xmlns:b="http://www.ietf.org"
330      xmlns:a="http://www.w3.org"
331      xmlns="http://example.org"/>
332   <e6 xmlns="" xmlns:a="http://www.w3.org">
333      <e7 xmlns="http://www.ietf.org">
334         <e8 xmlns="" xmlns:a="http://www.w3.org">
335            <e9 xmlns="" xmlns:a="http://www.ietf.org"/>
336         </e8>
337      </e7>
338   </e6>
339</doc>'''
340
341        ftDoc = NonvalidatingReader.parseString(xml)
342        f = StringIO()
343        CanonicalPrint(ftDoc, f)
344        c14n = f.getvalue()
345
346        exptdC14n = '''<doc>
347   <e1></e1>
348   <e2></e2>
349   <e3 id="elem3" name="elem3"></e3>
350   <e4 id="elem4" name="elem4"></e4>
351   <e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5>
352   <e6 xmlns:a="http://www.w3.org">
353      <e7 xmlns="http://www.ietf.org">
354         <e8 xmlns="">
355            <e9 xmlns:a="http://www.ietf.org" attr="default"></e9>
356         </e8>
357      </e7>
358   </e6>
359</doc>'''
360        self.assertEqual(c14n, exptdC14n)
361
362# Fails with:
363#
364# RuntimeException: Undefined namespace prefix: "ietf"       
365    def test14DocumentSubsets(self):
366        # Ref. http://www.w3.org/TR/xml-c14n#Example-DocSubsets
367        xml = \
368"""<!DOCTYPE doc [
369<!ATTLIST e2 xml:space (default|preserve) 'preserve'>
370<!ATTLIST e3 id ID #IMPLIED>
371]>
372<doc xmlns="http://www.ietf.org" xmlns:w3c="http://www.w3.org">
373   <e1>
374      <e2 xmlns="">
375         <e3 id="E3"/>
376      </e2>
377   </e1>
378</doc>"""
379
380#'''<!-- Evaluate with declaration xmlns:ietf="http://www.ietf.org" -->
381        xpathExpr = \
382'''
383(//. | //@* | //namespace::*)
384[
385   self::ietf:e1 or (parent::ietf:e1 and not(self::text() or self::e2))
386   or
387   count(id("E3")|ancestor-or-self::node()) = count(ancestor-or-self::node())
388]'''
389
390        exptdC14n = \
391'<e1 xmlns="http://www.ietf.org" xmlns:w3c="http://www.w3.org"><e3 xmlns="" id="E3" xml:space="preserve"></e3></e1>'
392
393        ftDoc = NonvalidatingReader.parseString(xml)
394
395        xpathExpression = XPath.Compile(xpathExpr)
396        ctx = XPath.Context.Context(ftDoc)
397        ftNode = xpathExpression.evaluate(ctx)
398        f = StringIO()
399        CanonicalPrint(ftNode, f)
400        c14n = f.getvalue()
401
402    def test15CmpZSIc14n(self):
403        ftDoc=NonvalidatingReader.parseUri('file://'+mkPath('windows-ac.xml'))       
404        ftOut = StringIO()
405        CanonicalPrint(ftDoc, ftOut)
406        ftC14n = ftOut.getvalue()
407       
408        reader = PyExpat.Reader()
409        dom = reader.fromStream(open('./windows-ac.xml'))
410       
411        zsiC14n = Canonicalize(dom)
412        self.failUnless(ftC14n == zsiC14n, "ZSI C14N output differs")
413       
414    def test16Cmplxmlc14n(self):
415        ftDoc=NonvalidatingReader.parseUri('file://'+mkPath('windows-ac.xml'))       
416        ftOut = StringIO()
417        CanonicalPrint(ftDoc, ftOut)
418        ftC14n = ftOut.getvalue()       
419       
420        from lxml import etree as lxmlET
421       
422        lxmlElem = lxmlET.parse('./windows-ac.xml')
423        lxmlETf = StringIO()
424        lxmlElem.write_c14n(lxmlETf)
425        lxmlETC14n = lxmlETf.getvalue()
426       
427        self.failUnless(ftC14n == lxmlETC14n, "lxml C14N output differs")
428       
429       
430    def test17InclusiveC14nWithXPath(self):
431        # Inclusive Canonicalization of portions of a SOAP message extracted
432        # using XPath
433       
434        inputFile = mkPath('soapGetAttCertResponse.xml')
435       
436        reader = PyExpat.Reader()
437        dom = reader.fromStream(open(inputFile))
438        processorNss = \
439        {
440            'wsu': ("http://docs.oasis-open.org/wss/2004/01/"
441                    "oasis-200401-wss-wssecurity-utility-1.0.xsd"),
442        }
443   
444        ctxt = Context(dom, processorNss=processorNss)
445        zsiRefNodes = xpath.Evaluate('//*[@wsu:Id]', 
446                                     contextNode=dom, 
447                                     context=ctxt)
448       
449        # 4Suite
450        ftDoc=NonvalidatingReader.parseUri('file://'+inputFile)       
451        ftOut = StringIO()
452       
453        # Extract nodes for signing
454        xpathExpression = XPath.Compile('//*[@wsu:Id]')
455        ctx = XPath.Context.Context(ftDoc, processorNss=processorNss)
456        ftRefNodes = xpathExpression.evaluate(ctx)
457       
458        for zsiRefNode, ftRefNode in zip(zsiRefNodes, ftRefNodes):
459            # Get ref node and all it's children
460            zsiRefC14n = Canonicalize(zsiRefNode)
461
462            print("_"*80)
463            print("ZSI Inclusive C14N %s:\n" % zsiRefNode.nodeName)
464            print(zsiRefC14n)
465                 
466            ftOut = StringIO()
467            CanonicalPrint(ftRefNode, ftOut)
468            ftRefC14n = ftOut.getvalue()       
469           
470            print("_"*80)
471            print("4Suite XML Inclusive C14N %s:\n", ftRefNode.nodeName)
472            print(ftRefC14n)
473            self.assertEqual(zsiRefC14n, ftRefC14n)
474       
475    def test18ExclC14nWithXPath(self):
476        # Exclusive C14N applied to portions of a SOAP message by extracting
477        # using XPath
478       
479        inputFile = mkPath('soapGetAttCertResponse.xml')
480
481        reader = PyExpat.Reader()
482        dom = reader.fromStream(open(inputFile))
483        processorNss = \
484        {
485            'wsu': ("http://docs.oasis-open.org/wss/2004/01/"
486                    "oasis-200401-wss-wssecurity-utility-1.0.xsd"),
487        }
488   
489        ctxt = Context(dom, processorNss=processorNss)
490        zsiRefNodes = xpath.Evaluate('//*[@wsu:Id]', 
491                                  contextNode=dom, 
492                                  context=ctxt)
493        # 4Suite
494        ftDoc=NonvalidatingReader.parseUri('file://'+inputFile)       
495        ftOut = StringIO()
496       
497        # Extract nodes for signing
498        xpathExpression = XPath.Compile('//*[@wsu:Id]')
499        ctx = XPath.Context.Context(ftDoc, processorNss=processorNss)
500        ftRefNodes = xpathExpression.evaluate(ctx)
501       
502        for zsiRefNode, ftRefNode in zip(zsiRefNodes, ftRefNodes):
503            # Get ref node and all it's children
504            refSubsetList = getChildNodes(zsiRefNode)
505            zsiRefC14n = Canonicalize(dom, None, subset=refSubsetList,
506                                      unsuppressedPrefixes=[])
507
508            print("_"*80)
509            print("ZSI Exclusive C14N %s:\n", zsiRefNode.nodeName)
510            print(zsiRefC14n)
511
512       
513            # 4Suite equivalent     
514            ftOut = StringIO()
515            CanonicalPrint(ftRefNode, stream=ftOut, exclusive=True)
516            ftRefC14n = ftOut.getvalue()       
517           
518            print("_"*80)
519            print("4Suite Exclusive C14N %s:\n", ftRefNode.nodeName)
520            print(ftRefC14n)
521       
522            self.assertEqual(zsiRefC14n, ftRefC14n)
523       
524    def test19ExclC14nWithXPathAndInclusiveNSPfx(self):
525        # Exclusive C14N applied to portions of a SOAP message by extracting
526        # using XPath
527        inputFile = mkPath('soapGetAttCertResponse.xml')
528       
529        from xml.xpath.Context import Context
530        from xml import xpath
531        from xml.dom.ext.reader import PyExpat
532        reader = PyExpat.Reader()
533        dom = reader.fromStream(open(inputFile))
534        processorNss = \
535        {
536            'wsu': ("http://docs.oasis-open.org/wss/2004/01/"
537                    "oasis-200401-wss-wssecurity-utility-1.0.xsd"),
538        }
539   
540        ctxt = Context(dom, processorNss=processorNss)
541        zsiRefNodes = xpath.Evaluate('//*[@wsu:Id]', 
542                                  contextNode=dom, 
543                                  context=ctxt)
544
545        # 4Suite
546        ftDoc=NonvalidatingReader.parseUri('file://'+inputFile)       
547        ftOut = StringIO()
548       
549        # Extract nodes for signing
550        xpathExpression = XPath.Compile('//*[@wsu:Id]')
551        ctx = XPath.Context.Context(ftDoc, processorNss=processorNss)
552        ftRefNodes = xpathExpression.evaluate(ctx)
553       
554        nsPfx = ['SOAP-ENV', 'ds']
555        for zsiRefNode, ftRefNode in zip(zsiRefNodes, ftRefNodes):
556            # Get ref node and all it's children
557            refSubsetList = getChildNodes(zsiRefNode)
558            zsiRefC14n = Canonicalize(dom, None, subset=refSubsetList,
559                                      unsuppressedPrefixes=nsPfx)
560
561            print("_"*80)
562            print("4Suite C14N with Prefixes %s:\n", zsiRefNode.nodeName)
563            print(zsiRefC14n)
564       
565            # 4Suite equivalent     
566            ftOut = StringIO()
567            CanonicalPrint(ftRefNode, stream=ftOut, exclusive=True,
568                           inclusivePrefixes=nsPfx)
569            ftRefC14n = ftOut.getvalue()       
570           
571            print("_"*80)
572            print("4Suite Exclusive C14N %s:\n", ftRefNode.nodeName)
573            print(ftRefC14n)
574
575            self.assertEqual(zsiRefC14n, ftRefC14n)
576     
577
578def getChildNodes(node, nodeList=None):
579    if nodeList is None:
580        nodeList = [node] 
581    return _getChildNodes(node, nodeList=nodeList)
582           
583def _getChildNodes(node, nodeList=None):
584
585    if node.attributes is not None:
586        nodeList += node.attributes.values() 
587    nodeList += node.childNodes
588    for childNode in node.childNodes:
589        _getChildNodes(childNode, nodeList)
590    return nodeList
591
592if __name__ == "__main__":
593    unittest.main()
594
Note: See TracBrowser for help on using the repository browser.