source: TI12-security/trunk/python/Tests/xmlsec/c14n.py @ 1415

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI12-security/trunk/python/Tests/xmlsec/c14n.py@1415
Revision 1415, 6.6 KB checked in by pjkersha, 13 years ago (diff)

Include XML Security test programs.

  • Property svn:executable set to *
Line 
1#! /usr/bin/env python
2'''XML C14N
3
4Perform XML Canonicalization.  Not fully conformant to the spec
5in a couple of ways (mostly minor):
6    Comments are always stripped
7    Whitespace preservation/stripping not totally correct
8    Processing Instruction nodes aren't handled
9    The nodeset must start with an element and includes all descendants
10Fixing the last one would be non-trivial.
11'''
12
13_copyright = '''Copyright 2001, Zolera Systems Inc.  All Rights Reserved.
14Distributed under the terms of the Python 2.0 Copyright.'''
15
16from xml.dom import Node
17import re
18import StringIO
19
20_attrs = lambda E: E._get_attributes() or []
21_children = lambda E: E._get_childNodes() or []
22_sorter = lambda n1, n2: cmp(n1._get_nodeName(), n2._get_nodeName())
23xmlns_base = "http://www.w3.org/2000/xmlns/"
24
25class _implementation:
26
27    # Handlers for each node, by node type.
28    handlers = {}
29
30    # pattern/replacement list for whitespace stripping.
31    repats = (
32    ( re.compile(r'^[ \t]+', re.MULTILINE), '' ),
33    ( re.compile(r'[ \t]+$', re.MULTILINE), '' ),
34    ( re.compile(r'[\r\n]+'), '\n' ),
35    )
36
37    def __init__(self, node, write, nsdict={}, stripspace=0):
38        '''Create and run the implementation.'''
39        if node._get_nodeType() != Node.ELEMENT_NODE:
40            raise TypeError, 'Non-element node'
41        self.write, self.ns_stack, self.stripspace = \
42            write, [nsdict], stripspace
43        self._do_element(node)
44        self.ns_stack.pop()
45
46    def _do_text(self, node):
47        'Output a text node in canonical form.'
48        s = node._get_data() \
49            .replace("\015", "
") \
50            .replace("&", "&") \
51            .replace("<", "&lt;") \
52            .replace(">", "&gt;")
53        if self.stripspace:
54            for pat,repl in _implementation.repats:
55                s = re.sub(pat, repl, s)
56        if s: self.write(s)
57    handlers[Node.TEXT_NODE] =_do_text
58    handlers[Node.CDATA_SECTION_NODE] =_do_text
59
60    def _do_pi(self, node):
61        'Output a processing instruction in canonical form.'
62        pass    # XXX
63        handlers[Node.PROCESSING_INSTRUCTION_NODE] =_do_pi
64
65    def _do_comment(self, node):
66        'Output a comment node in canonical form.'
67        pass    # XXX
68        handlers[Node.COMMENT_NODE] =_do_comment
69
70    def _do_attr(self, n, value):
71        'Output an attribute in canonical form.'
72        W = self.write
73        W(' ')
74        W(n)
75        W('="')
76        s = value \
77            .replace("&", "&amp;") \
78            .replace("<", "&lt;") \
79            .replace('"', '&quot;') \
80            .replace('\011', '&#9') \
81            .replace('\012', '&#A') \
82            .replace('\015', '&#D')
83        W(s)
84        W('"')
85
86    def _do_element(self, node):
87        'Output an element (and its children) in canonical form.'
88        name = node._get_nodeName()
89        parent_ns = self.ns_stack[-1]
90        my_ns = { 'xmlns': parent_ns.get('xmlns', '') }
91        W = self.write
92        W('<')
93        W(name)
94   
95        # Divide attributes to NS definitions and others.
96        nsnodes, others = [], []
97        for a in _attrs(node):
98            if a._get_namespaceURI() == xmlns_base:
99                nsnodes.append(a)
100            else:
101                others.append(a)
102   
103        # Namespace attributes: update dictionary; if not already
104        # in parent, output it.
105        nsnodes.sort(_sorter)
106        for a in nsnodes:
107            n = a._get_nodeName()
108            if n == "xmlns:":
109                key, n = "", "xmlns"
110            else:
111                key = a._get_localName()
112            v = my_ns[key] = a._get_nodeValue()
113            pval = parent_ns.get(key, None)
114            if v != pval: self._do_attr(n, v)
115   
116        # Other attributes: sort and output.
117        others.sort(_sorter)
118        for a in others:
119            self._do_attr(a._get_nodeName(), a._get_value())
120        W('>')
121   
122        self.ns_stack.append(my_ns)
123        for c in _children(node):
124            handler = _implementation.handlers.get(c._get_nodeType(), None)
125            if handler: handler(self, c)
126        self.ns_stack.pop()
127        W('</%s>' % (name,))
128    handlers[Node.ELEMENT_NODE] =_do_element
129
130def XMLC14N(node, output=None, **kw):
131    '''Canonicalize a DOM element node and everything underneath it.
132    Return the text; if output is specified then output.write will
133    be called to output the text and the return value will be None.
134    Keyword parameters:
135    stripspace -- remove extra (almost all) whitespace from text nodes
136    nsdict -- a dictionary of prefix/uri namespace entries assumed
137        to exist in the surrounding context.
138    '''
139
140    if output:
141        s = None
142    else:
143        output = s = StringIO.StringIO()
144
145    _implementation(node,
146    output.write,
147    stripspace=kw.get('stripspace', 0),
148    nsdict=kw.get('nsdict', {})
149    )
150    if s: return (s.getvalue(), s.close())[0]
151    return None
152    if s == None: return None
153    ret = s.getvalue()
154    s.close()
155    return ret
156
157if __name__ == '__main__':
158    text = '''<SOAP-ENV:Envelope
159      xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
160      xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/"
161      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
162      xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:spare='foo'
163      SOAP-ENV:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">
164    <SOAP-ENV:Body xmlns='test-uri'><?MYPI spenser?>
165        <Price xsi:type='xsd:integer'>34</Price>    <!-- 0 -->
166        <SOAP-ENC:byte>44</SOAP-ENC:byte>    <!-- 1 -->
167        <Name>This is the name</Name>    <!-- 2 -->
168        <n2><![CDATA[<greeting>Hello</greeting>]]></n2> <!-- 3 -->
169        <n3 href='#zzz' xsi:type='SOAP-ENC:string'/>        <!-- 4 -->
170        <n64>a GVsbG8=</n64>        <!-- 5 -->
171        <SOAP-ENC:string>Red</SOAP-ENC:string>    <!-- 6 -->
172        <a2 href='#tri2'/>        <!-- 7 -->
173        <a2 xmlns:f='z' xmlns:aa='zz'><i xmlns:f='z'>12</i><t>rich salz</t></a2> <!-- 8 -->
174        <xsd:hexBinary>3F2041</xsd:hexBinary> <!-- 9 -->
175        <nullint xsi:nil='1'/> <!-- 10 -->
176    </SOAP-ENV:Body>
177      <z xmlns='myns' id='zzz'>The value of n3</z>
178      <zz xmlns:spare='foo' xmlns='myns2' id='tri2'><inner>content</inner></zz>
179    </SOAP-ENV:Envelope>'''
180
181    print _copyright
182    from xml.dom.ext.reader import PyExpat
183    reader = PyExpat.Reader()
184    dom = reader.fromString(text)
185    for e in _children(dom):
186        if e._get_nodeType() != Node.ELEMENT_NODE: continue
187        for ee in _children(e):
188            if ee._get_nodeType() != Node.ELEMENT_NODE: continue
189            print '\n', '=' * 60
190            print XMLC14N(ee, nsdict={'spare':'foo'}, stripspace=1)
191            print '-' * 60
192            print XMLC14N(ee, stripspace=0)
193            print '=' * 60
Note: See TracBrowser for help on using the repository browser.