source: exist/trunk/python/elementtree-1.3a6-20070212-badc/elementtree/ElementC14N.py @ 3429

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/elementtree-1.3a6-20070212-badc/elementtree/ElementC14N.py@4663
Revision 3429, 8.3 KB checked in by pjkersha, 12 years ago (diff)

Added latest ElementTree version (12/2/08) from Fredrik Lundh

Line 
1#
2# ElementTree
3# $Id: ElementC14N.py 3376 2008-02-13 08:17:35Z fredrik $
4#
5# canonicalisation (c14n) support for element trees
6#
7# history:
8# 2007-12-14 fl   created (normalized version)
9# 2008-02-12 fl   roundtrip support
10#
11# Copyright (c) 2007-2008 by Fredrik Lundh.  All rights reserved.
12#
13# fredrik@pythonware.com
14# http://www.pythonware.com
15#
16# --------------------------------------------------------------------
17# The ElementTree toolkit is
18#
19# Copyright (c) 1999-2008 by Fredrik Lundh
20#
21# By obtaining, using, and/or copying this software and/or its
22# associated documentation, you agree that you have read, understood,
23# and will comply with the following terms and conditions:
24#
25# Permission to use, copy, modify, and distribute this software and
26# its associated documentation for any purpose and without fee is
27# hereby granted, provided that the above copyright notice appears in
28# all copies, and that both that copyright notice and this permission
29# notice appear in supporting documentation, and that the name of
30# Secret Labs AB or the author not be used in advertising or publicity
31# pertaining to distribution of the software without specific, written
32# prior permission.
33#
34# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
35# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
36# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
37# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
38# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
39# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
40# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
41# OF THIS SOFTWARE.
42# --------------------------------------------------------------------
43
44# TODO: restore support for comments, processing instructions
45# TODO: restore exclusive namespace support
46
47from ElementTree import Comment, ProcessingInstruction, QName
48from ElementTree import ElementTree, iterparse
49from ElementTree import _namespaces, _raise_serialization_error
50
51# C14N escape methods
52
53def _escape_cdata_c14n(text):
54    # escape character data
55    try:
56        # it's worth avoiding do-nothing calls for strings that are
57        # shorter than 500 character, or so.  assume that's, by far,
58        # the most common case in most applications.
59        if "&" in text:
60            text = text.replace("&", "&")
61        if "<" in text:
62            text = text.replace("<", "&lt;")
63        if ">" in text:
64            text = text.replace(">", "&gt;")
65        if "\r" in text:
66            text = text.replace("\n", "&#xD;")
67        return text.encode("utf-8")
68    except (TypeError, AttributeError):
69        _raise_serialization_error(text)
70
71def _escape_attrib_c14n(text):
72    # escape attribute value
73    try:
74        if "&" in text:
75            text = text.replace("&", "&amp;")
76        if "<" in text:
77            text = text.replace("<", "&lt;")
78        if "\"" in text:
79            text = text.replace("\"", "&quot;")
80        if "\t" in text:
81            text = text.replace("\t", "&#x9;")
82        if "\n" in text:
83            text = text.replace("\n", "&#xA;")
84        if "\r" in text:
85            text = text.replace("\r", "&#xD;")
86        return text.encode("utf-8")
87    except (TypeError, AttributeError):
88        _raise_serialization_error(text)
89
90class WriteC14N:
91    # C14N writer target
92
93    def __init__(self, write):
94        self.write = write
95
96    def start(self, tag, attrs):
97        # expects to get the attributes as a list of pairs, *in order*
98        # FIXME: pass in prefix/uri/tag instead?
99        write = self.write
100        write("<" + tag.encode("utf-8"))
101        for k, v in attrs:
102            write(" %s=\"%s\"" % (k.encode("utf-8"), _escape_attrib_c14n(v)))
103        write(">")
104
105    def data(self, data):
106        self.write(_escape_cdata_c14n(data))
107
108    def end(self, tag):
109        self.write("</" + tag.encode("utf-8") + ">")
110
111def _serialize(elem, target, qnames, namespaces):
112
113    # event generator
114    def emit(elem, namespaces=None):
115        tag = qnames[elem.tag]
116        attrib = []
117        # namespaces first, sorted by prefix
118        if namespaces:
119            for v, k in sorted(namespaces.items(), key=lambda x: x[1]):
120                attrib.append(("xmlns:" + k, v))
121        # attributes next, sorted by (uri, local)
122        for k, v in sorted(elem.attrib.items()):
123            attrib.append((qnames[k], v))
124        target.start(tag, attrib)
125        if elem.text:
126            target.data(elem.text)
127        for e in elem:
128            emit(e)
129        target.end(tag)
130        if elem.tail:
131            target.data(elem.tail)
132
133    emit(elem, namespaces)
134
135def _serialize_scope(elem, target, scope, parent):
136
137    def findprefix(elem, uri):
138        context = elem
139        while context is not None:
140            ns = scope.get(context)
141            if ns:
142                for p, u in ns:
143                    if u == uri:
144                        return p
145            context = parent.get(context)
146        return None
147
148    def qname(elem, qname):
149        if qname[:1] == "{":
150            uri, tag = qname[1:].split("}", 1)
151            prefix = findprefix(elem, uri)
152            if prefix == "":
153                return tag # default namespace
154            if prefix is None:
155                raise IOError("%s not in scope" % uri) # FIXME
156            return prefix + ":" + tag
157        else:
158            return qname
159
160    def emit(elem):
161        tag = qname(elem, elem.tag)
162        attrib = []
163        # namespaces first, sorted by prefix
164        namespaces = scope.get(elem)
165        if namespaces:
166            # FIXME: check subtree for irrelevant declarations
167            for p, u in sorted(namespaces, key=lambda x: x[1]):
168                if p:
169                    attrib.append(("xmlns:" + p, u))
170        # attributes next, sorted by (uri, local)
171        for k, v in sorted(elem.attrib.items()):
172            attrib.append((qname(elem, k), v))
173        target.start(tag, attrib)
174        if elem.text:
175            target.data(elem.text)
176        for e in elem:
177            emit(e)
178        target.end(tag)
179        if elem.tail:
180            target.data(elem.tail)
181
182    emit(elem)
183
184#
185# hook used by ElementTree's c14n output method
186
187def _serialize_c14n(write, elem, encoding, qnames, namespaces, subset=None):
188    if encoding != "utf-8":
189        raise ValueError("invalid encoding (%s)" % encoding)
190    _serialize(elem, WriteC14N(write), qnames, namespaces)
191
192##
193# Writes a canonicalized document.
194#
195# @param elem Element or ElementTree.  If passed a tree created by {@link
196#     parse}, the function attempts to preserve existing prefixes.
197#     Otherwise, new prefixes are allocated.
198# @param file Output file.  Can be either a filename or a file-like object.
199# @param subset_element Subset element.  This element, and all its
200#     subelements, are serialized as a complete subdocument.
201
202def write(elem, file, subset_element=None):
203    assert subset_element is None, "not enabled in this release"
204    if not hasattr(file, "write"):
205        file = open(file, "wb")
206    if not hasattr(elem, "_scope"):
207        # allocate new prefixes
208        qnames, namespaces = _namespaces(elem, "utf-8")
209        _serialize(elem, WriteC14N(file.write), qnames, namespaces)
210    else:
211        _serialize_scope(elem.getroot(), WriteC14N(file.write),
212                         elem._scope, elem._parent)
213
214##
215# Parses an XML file, and builds a tree annotated with scope and parent
216# information.
217#
218# @param file A file name or file object.
219# @return An extended ElementTree, with extra scope and parent information.
220
221def parse(file):
222
223    events = "start", "start-ns"
224
225    root = None
226    ns_map = []
227
228    scope = {}
229    parent = {}
230
231    stack = []
232
233    for event, elem in iterparse(file, events):
234
235        if event == "start-ns":
236            ns_map.append(elem)
237
238        elif event == "start":
239            if stack:
240                parent[elem] = stack[-1]
241            stack.append(elem)
242            if root is None:
243                root = elem
244            if ns_map:
245                scope[elem] = ns_map
246                ns_map = []
247
248        elif event == "end":
249            stack.pop()
250
251    tree = ElementTree(root)
252    tree._scope = scope
253    tree._parent = parent
254
255    return tree
Note: See TracBrowser for help on using the repository browser.