source: exist/branches/proglue_production_rev4605_xquery4884/python/elementtree-1.3a6-20070310-badc/elementtree/ElementTree.py @ 4886

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/branches/proglue_production_rev4605_xquery4884/python/elementtree-1.3a6-20070310-badc/elementtree/ElementTree.py@4886
Revision 4886, 54.3 KB checked in by sdonegan, 10 years ago (diff)

Creating operational branch for ndgUtils as used on proglue

Line 
1#
2# ElementTree
3# $Id: ElementTree.py 3384 2008-02-20 08:24:25Z fredrik $
4#
5# light-weight XML support for Python 2.3 and later.
6#
7# history:
8# 2001-10-20 fl   created (from various sources)
9# 2001-11-01 fl   return root from parse method
10# 2002-02-16 fl   sort attributes in lexical order
11# 2002-04-06 fl   TreeBuilder refactoring, added PythonDoc markup
12# 2002-05-01 fl   finished TreeBuilder refactoring
13# 2002-07-14 fl   added basic namespace support to ElementTree.write
14# 2002-07-25 fl   added QName attribute support
15# 2002-10-20 fl   fixed encoding in write
16# 2002-11-24 fl   changed default encoding to ascii; fixed attribute encoding
17# 2002-11-27 fl   accept file objects or file names for parse/write
18# 2002-12-04 fl   moved XMLTreeBuilder back to this module
19# 2003-01-11 fl   fixed entity encoding glitch for us-ascii
20# 2003-02-13 fl   added XML literal factory
21# 2003-02-21 fl   added ProcessingInstruction/PI factory
22# 2003-05-11 fl   added tostring/fromstring helpers
23# 2003-05-26 fl   added ElementPath support
24# 2003-07-05 fl   added makeelement factory method
25# 2003-07-28 fl   added more well-known namespace prefixes
26# 2003-08-15 fl   fixed typo in ElementTree.findtext (Thomas Dartsch)
27# 2003-09-04 fl   fall back on emulator if ElementPath is not installed
28# 2003-10-31 fl   markup updates
29# 2003-11-15 fl   fixed nested namespace bug
30# 2004-03-28 fl   added XMLID helper
31# 2004-06-02 fl   added default support to findtext
32# 2004-06-08 fl   fixed encoding of non-ascii element/attribute names
33# 2004-08-23 fl   take advantage of post-2.1 expat features
34# 2004-09-03 fl   made Element class visible; removed factory
35# 2005-02-01 fl   added iterparse implementation
36# 2005-03-02 fl   fixed iterparse support for pre-2.2 versions
37# 2005-11-12 fl   added tostringlist/fromstringlist helpers
38# 2006-07-05 fl   merged in selected changes from the 1.3 sandbox
39# 2006-07-05 fl   removed support for 2.1 and earlier
40# 2007-06-21 fl   added deprecation/future warnings
41# 2007-08-25 fl   added doctype hook, added parser version attribute etc
42# 2007-08-26 fl   added new serializer code (better namespace handling, etc)
43# 2007-08-27 fl   warn for broken /tag searches on tree level
44# 2007-09-02 fl   added html/text methods to serializer (experimental)
45# 2007-09-05 fl   added method argument to tostring/tostringlist
46# 2007-09-06 fl   improved error handling
47# 2007-09-13 fl   added itertext, iterfind; assorted cleanups
48# 2007-12-15 fl   added C14N hooks, copy method (experimental)
49#
50# Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved.
51#
52# fredrik@pythonware.com
53# http://www.pythonware.com
54#
55# --------------------------------------------------------------------
56# The ElementTree toolkit is
57#
58# Copyright (c) 1999-2008 by Fredrik Lundh
59#
60# By obtaining, using, and/or copying this software and/or its
61# associated documentation, you agree that you have read, understood,
62# and will comply with the following terms and conditions:
63#
64# Permission to use, copy, modify, and distribute this software and
65# its associated documentation for any purpose and without fee is
66# hereby granted, provided that the above copyright notice appears in
67# all copies, and that both that copyright notice and this permission
68# notice appear in supporting documentation, and that the name of
69# Secret Labs AB or the author not be used in advertising or publicity
70# pertaining to distribution of the software without specific, written
71# prior permission.
72#
73# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
74# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
75# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
76# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
77# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
78# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
79# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
80# OF THIS SOFTWARE.
81# --------------------------------------------------------------------
82
83__all__ = [
84    # public symbols
85    "Comment",
86    "dump",
87    "Element", "ElementTree",
88    "fromstring", "fromstringlist",
89    "iselement", "iterparse",
90    "parse", "ParseError",
91    "PI", "ProcessingInstruction",
92    "QName",
93    "SubElement",
94    "tostring", "tostringlist",
95    "TreeBuilder",
96    "VERSION",
97    "XML",
98    "XMLParser", "XMLTreeBuilder",
99    ]
100
101VERSION = "1.3aX"
102
103##
104# The <b>Element</b> type is a flexible container object, designed to
105# store hierarchical data structures in memory. The type can be
106# described as a cross between a list and a dictionary.
107# <p>
108# Each element has a number of properties associated with it:
109# <ul>
110# <li>a <i>tag</i>. This is a string identifying what kind of data
111# this element represents (the element type, in other words).</li>
112# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
113# <li>a <i>text</i> string.</li>
114# <li>an optional <i>tail</i> string.</li>
115# <li>a number of <i>child elements</i>, stored in a Python sequence</li>
116# </ul>
117#
118# To create an element instance, use the {@link #Element} constructor
119# or the {@link #SubElement} factory function.
120# <p>
121# The {@link #ElementTree} class can be used to wrap an element
122# structure, and convert it from and to XML.
123##
124
125import sys, re
126
127class _SimpleElementPath(object):
128    # emulate pre-1.2 find/findtext/findall behaviour
129    def find(self, element, tag, namespaces=None):
130        for elem in element:
131            if elem.tag == tag:
132                return elem
133        return None
134    def findtext(self, element, tag, default=None, namespaces=None):
135        for elem in element:
136            if elem.tag == tag:
137                return elem.text or ""
138        return default
139    def findall(self, element, tag, namespaces=None):
140        if tag[:3] == ".//":
141            return list(element.iter(tag[3:]))
142        result = []
143        for elem in element:
144            if elem.tag == tag:
145                result.append(elem)
146        return result
147
148try:
149    import ElementPath
150except ImportError:
151    ElementPath = _SimpleElementPath()
152
153##
154# Parser error.  This is a subclass of <b>SyntaxError</b>.
155# <p>
156# In addition to the exception value, an exception instance contains a
157# specific exception code in the <b>code</b> attribute, and the line and
158# column of the error in the <b>position</b> attribute.
159
160class ParseError(SyntaxError):
161    pass
162
163# --------------------------------------------------------------------
164
165##
166# Checks if an object appears to be a valid element object.
167#
168# @param An element instance.
169# @return A true value if this is an element object.
170# @defreturn flag
171
172def iselement(element):
173    # FIXME: not sure about this; might be a better idea to look
174    # for tag/attrib/text attributes
175    return isinstance(element, Element) or hasattr(element, "tag")
176
177##
178# Element class.  This class defines the Element interface, and
179# provides a reference implementation of this interface.
180# <p>
181# The element name, attribute names, and attribute values can be
182# either ASCII strings (ordinary Python strings containing only 7-bit
183# ASCII characters) or Unicode strings.
184#
185# @param tag The element name.
186# @param attrib An optional dictionary, containing element attributes.
187# @param **extra Additional attributes, given as keyword arguments.
188# @see Element
189# @see SubElement
190# @see Comment
191# @see ProcessingInstruction
192
193class Element(object):
194    # <tag attrib>text<child/>...</tag>tail
195
196    ##
197    # (Attribute) Element tag.
198
199    tag = None
200
201    ##
202    # (Attribute) Element attribute dictionary.  Where possible, use
203    # {@link #Element.get},
204    # {@link #Element.set},
205    # {@link #Element.keys}, and
206    # {@link #Element.items} to access
207    # element attributes.
208
209    attrib = None
210
211    ##
212    # (Attribute) Text before first subelement.  This is either a
213    # string or the value None.  Note that if there was no text, this
214    # attribute may be either None or an empty string, depending on
215    # the parser.
216
217    text = None
218
219    ##
220    # (Attribute) Text after this element's end tag, but before the
221    # next sibling element's start tag.  This is either a string or
222    # the value None.  Note that if there was no text, this attribute
223    # may be either None or an empty string, depending on the parser.
224
225    tail = None # text after end tag, if any
226
227    # constructor
228
229    def __init__(self, tag, attrib={}, **extra):
230        attrib = attrib.copy()
231        attrib.update(extra)
232        self.tag = tag
233        self.attrib = attrib
234        self._children = []
235
236    def __repr__(self):
237        return "<Element %s at %x>" % (repr(self.tag), id(self))
238
239    ##
240    # Creates a new element object of the same type as this element.
241    #
242    # @param tag Element tag.
243    # @param attrib Element attributes, given as a dictionary.
244    # @return A new element instance.
245
246    def makeelement(self, tag, attrib):
247        return self.__class__(tag, attrib)
248
249    ##
250    # (Experimental) Copies the current element.  This creates a
251    # shallow copy; subelements will be shared with the original tree.
252    #
253    # @return A new element instance.
254
255    def copy(self):
256        elem = self.makeelement(self.tag, self.attrib)
257        elem.text = self.text
258        elem.tail = self.tail
259        elem[:] = self
260        return elem
261
262    ##
263    # Returns the number of subelements.  Note that this only counts
264    # full elements; to check if there's any content in an element, you
265    # have to check both the length and the <b>text</b> attribute.
266    #
267    # @return The number of subelements.
268
269    def __len__(self):
270        return len(self._children)
271
272    def __nonzero__(self):
273        import warnings
274        warnings.warn(
275            "The behavior of this method will change in future versions. "
276            "Use specific 'len(elem)' or 'elem is not None' test instead.",
277            FutureWarning, stacklevel=2
278            )
279        return len(self._children) != 0 # emulate old behaviour, for now
280
281    ##
282    # Returns the given subelement, by index.
283    #
284    # @param index What subelement to return.
285    # @return The given subelement.
286    # @exception IndexError If the given element does not exist.
287
288    def __getitem__(self, index):
289        return self._children[index]
290
291    ##
292    # Replaces the given subelement, by index.
293    #
294    # @param index What subelement to replace.
295    # @param element The new element value.
296    # @exception IndexError If the given element does not exist.
297
298    def __setitem__(self, index, element):
299        # assert iselement(element)
300        self._children[index] = element
301
302    ##
303    # Deletes the given subelement, by index.
304    #
305    # @param index What subelement to delete.
306    # @exception IndexError If the given element does not exist.
307
308    def __delitem__(self, index):
309        del self._children[index]
310
311    ##
312    # Returns a list containing subelements in the given range.
313    #
314    # @param start The first subelement to return.
315    # @param stop The first subelement that shouldn't be returned.
316    # @return A sequence object containing subelements.
317
318    def __getslice__(self, start, stop):
319        return self._children[start:stop]
320
321    ##
322    # Replaces a number of subelements with elements from a sequence.
323    #
324    # @param start The first subelement to replace.
325    # @param stop The first subelement that shouldn't be replaced.
326    # @param elements A sequence object with zero or more elements.
327
328    def __setslice__(self, start, stop, elements):
329        # for element in elements:
330        #     assert iselement(element)
331        self._children[start:stop] = list(elements)
332
333    ##
334    # Deletes a number of subelements.
335    #
336    # @param start The first subelement to delete.
337    # @param stop The first subelement to leave in there.
338
339    def __delslice__(self, start, stop):
340        del self._children[start:stop]
341
342    ##
343    # Adds a subelement to the end of this element.  In document order,
344    # the new element will appear after the last existing subelement (or
345    # directly after the text, if it's the first subelement), but before
346    # the end tag for this element.
347    #
348    # @param element The element to add.
349
350    def append(self, element):
351        # assert iselement(element)
352        self._children.append(element)
353
354    ##
355    # Appends subelements from a sequence.
356    #
357    # @param elements A sequence object with zero or more elements.
358    # @since 1.3
359
360    def extend(self, elements):
361        # for element in elements:
362        #     assert iselement(element)
363        self._children.extend(elements)
364
365    ##
366    # Inserts a subelement at the given position in this element.
367    #
368    # @param index Where to insert the new subelement.
369
370    def insert(self, index, element):
371        # assert iselement(element)
372        self._children.insert(index, element)
373
374    ##
375    # Removes a matching subelement.  Unlike the <b>find</b> methods,
376    # this method compares elements based on identity, not on tag
377    # value or contents.  To remove subelements by other means, the
378    # easiest way is often to use a list comprehension to select what
379    # elements to keep, and use slice assignment to update the parent
380    # element.
381    #
382    # @param element What element to remove.
383    # @exception ValueError If a matching element could not be found.
384
385    def remove(self, element):
386        # assert iselement(element)
387        self._children.remove(element)
388
389    ##
390    # (Deprecated) Returns all subelements.  The elements are returned
391    # in document order.
392    #
393    # @return A list of subelements.
394    # @defreturn list of Element instances
395
396    def getchildren(self):
397        import warnings
398        warnings.warn(
399            "This method will be removed in future versions. "
400            "Use 'list(elem)' or iteration over elem instead.",
401            DeprecationWarning, stacklevel=2
402            )
403        return self._children
404
405    ##
406    # Finds the first matching subelement, by tag name or path.
407    #
408    # @param path What element to look for.
409    # @keyparam namespaces Optional namespace prefix map.
410    # @return The first matching element, or None if no element was found.
411    # @defreturn Element or None
412
413    def find(self, path, namespaces=None):
414        return ElementPath.find(self, path, namespaces)
415
416    ##
417    # Finds text for the first matching subelement, by tag name or path.
418    #
419    # @param path What element to look for.
420    # @param default What to return if the element was not found.
421    # @keyparam namespaces Optional namespace prefix map.
422    # @return The text content of the first matching element, or the
423    #     default value no element was found.  Note that if the element
424    #     has is found, but has no text content, this method returns an
425    #     empty string.
426    # @defreturn string
427
428    def findtext(self, path, default=None, namespaces=None):
429        return ElementPath.findtext(self, path, default, namespaces)
430
431    ##
432    # Finds all matching subelements, by tag name or path.
433    #
434    # @param path What element to look for.
435    # @keyparam namespaces Optional namespace prefix map.
436    # @return A list or other sequence containing all matching elements,
437    #    in document order.
438    # @defreturn list of Element instances
439
440    def findall(self, path, namespaces=None):
441        return ElementPath.findall(self, path, namespaces)
442
443    ##
444    # Finds all matching subelements, by tag name or path.
445    #
446    # @param path What element to look for.
447    # @keyparam namespaces Optional namespace prefix map.
448    # @return An iterator or sequence containing all matching elements,
449    #    in document order.
450    # @defreturn a generated sequence of Element instances
451
452    def iterfind(self, path, namespaces=None):
453        return ElementPath.iterfind(self, path, namespaces)
454
455    ##
456    # Resets an element.  This function removes all subelements, clears
457    # all attributes, and sets the <b>text</b> and <b>tail</b> attributes
458    # to None.
459
460    def clear(self):
461        self.attrib.clear()
462        self._children = []
463        self.text = self.tail = None
464
465    ##
466    # Gets an element attribute.  Equivalent to <b>attrib.get</b>, but
467    # some implementations may handle this a bit more efficiently.
468    #
469    # @param key What attribute to look for.
470    # @param default What to return if the attribute was not found.
471    # @return The attribute value, or the default value, if the
472    #     attribute was not found.
473    # @defreturn string or None
474
475    def get(self, key, default=None):
476        return self.attrib.get(key, default)
477
478    ##
479    # Sets an element attribute.  Equivalent to <b>attrib[key] = value</b>,
480    # but some implementations may handle this a bit more efficiently.
481    #
482    # @param key What attribute to set.
483    # @param value The attribute value.
484
485    def set(self, key, value):
486        self.attrib[key] = value
487
488    ##
489    # Gets a list of attribute names.  The names are returned in an
490    # arbitrary order (just like for an ordinary Python dictionary).
491    # Equivalent to <b>attrib.keys()</b>.
492    #
493    # @return A list of element attribute names.
494    # @defreturn list of strings
495
496    def keys(self):
497        return self.attrib.keys()
498
499    ##
500    # Gets element attributes, as a sequence.  The attributes are
501    # returned in an arbitrary order.  Equivalent to <b>attrib.items()</b>.
502    #
503    # @return A list of (name, value) tuples for all attributes.
504    # @defreturn list of (string, string) tuples
505
506    def items(self):
507        return self.attrib.items()
508
509    ##
510    # Creates a tree iterator.  The iterator loops over this element
511    # and all subelements, in document order, and returns all elements
512    # with a matching tag.
513    # <p>
514    # If the tree structure is modified during iteration, new or removed
515    # elements may or may not be included.  To get a stable set, use the
516    # list() function on the iterator, and loop over the resulting list.
517    #
518    # @param tag What tags to look for (default is to return all elements).
519    # @return An iterator containing all the matching elements.
520    # @defreturn iterator
521
522    def iter(self, tag=None):
523        if tag == "*":
524            tag = None
525        if tag is None or self.tag == tag:
526            yield self
527        for e in self._children:
528            for e in e.iter(tag):
529                yield e
530
531    # compatibility
532    def getiterator(self, tag=None):
533        # enable in 1.4
534        # import warnings
535        # warnings.warn(
536        #     "This method will be removed in future versions. "
537        #     "Use 'elem.iter()' or 'list(elem.iter())' instead.",
538        #     DeprecationWarning, stacklevel=2
539        # )
540        return list(self.iter(tag))
541
542    ##
543    # Creates a text iterator.  The iterator loops over this element
544    # and all subelements, in document order, and returns all inner
545    # text.
546    #
547    # @return An iterator containing all inner text.
548    # @defreturn iterator
549
550    def itertext(self):
551        tag = self.tag
552        if not isinstance(tag, basestring) and tag is not None:
553            return
554        if self.text:
555            yield self.text
556        for e in self:
557            for s in e.itertext():
558                yield s
559            if e.tail:
560                yield e.tail
561
562# compatibility
563_Element = _ElementInterface = Element
564
565##
566# Subelement factory.  This function creates an element instance, and
567# appends it to an existing element.
568# <p>
569# The element name, attribute names, and attribute values can be
570# either 8-bit ASCII strings or Unicode strings.
571#
572# @param parent The parent element.
573# @param tag The subelement name.
574# @param attrib An optional dictionary, containing element attributes.
575# @param **extra Additional attributes, given as keyword arguments.
576# @return An element instance.
577# @defreturn Element
578
579def SubElement(parent, tag, attrib={}, **extra):
580    attrib = attrib.copy()
581    attrib.update(extra)
582    element = parent.makeelement(tag, attrib)
583    parent.append(element)
584    return element
585
586##
587# Comment element factory.  This factory function creates a special
588# element that will be serialized as an XML comment by the standard
589# serializer.
590# <p>
591# The comment string can be either an 8-bit ASCII string or a Unicode
592# string.
593#
594# @param text A string containing the comment string.
595# @return An element instance, representing a comment.
596# @defreturn Element
597
598def Comment(text=None):
599    element = Element(Comment)
600    element.text = text
601    return element
602
603##
604# PI element factory.  This factory function creates a special element
605# that will be serialized as an XML processing instruction by the standard
606# serializer.
607#
608# @param target A string containing the PI target.
609# @param text A string containing the PI contents, if any.
610# @return An element instance, representing a PI.
611# @defreturn Element
612
613def ProcessingInstruction(target, text=None):
614    element = Element(ProcessingInstruction)
615    element.text = target
616    if text:
617        element.text = element.text + " " + text
618    return element
619
620PI = ProcessingInstruction
621
622##
623# QName wrapper.  This can be used to wrap a QName attribute value, in
624# order to get proper namespace handling on output.
625#
626# @param text A string containing the QName value, in the form {uri}local,
627#     or, if the tag argument is given, the URI part of a QName.
628# @param tag Optional tag.  If given, the first argument is interpreted as
629#     an URI, and this argument is interpreted as a local name.
630# @return An opaque object, representing the QName.
631
632class QName(object):
633    def __init__(self, text_or_uri, tag=None):
634        if tag:
635            text_or_uri = "{%s}%s" % (text_or_uri, tag)
636        self.text = text_or_uri
637    def __str__(self):
638        return self.text
639    def __hash__(self):
640        return hash(self.text)
641    def __cmp__(self, other):
642        if isinstance(other, QName):
643            return cmp(self.text, other.text)
644        return cmp(self.text, other)
645
646# --------------------------------------------------------------------
647
648##
649# ElementTree wrapper class.  This class represents an entire element
650# hierarchy, and adds some extra support for serialization to and from
651# standard XML.
652#
653# @param element Optional root element.
654# @keyparam file Optional file handle or file name.  If given, the
655#     tree is initialized with the contents of this XML file.
656
657class ElementTree(object):
658
659    def __init__(self, element=None, file=None):
660        # assert element is None or iselement(element)
661        self._root = element # first node
662        if file:
663            self.parse(file)
664
665    ##
666    # Gets the root element for this tree.
667    #
668    # @return An element instance.
669    # @defreturn Element
670
671    def getroot(self):
672        return self._root
673
674    ##
675    # Replaces the root element for this tree.  This discards the
676    # current contents of the tree, and replaces it with the given
677    # element.  Use with care.
678    #
679    # @param element An element instance.
680
681    def _setroot(self, element):
682        # assert iselement(element)
683        self._root = element
684
685    ##
686    # Loads an external XML document into this element tree.
687    #
688    # @param source A file name or file object.  If a file object is
689    #     given, it only has to implement a <b>read(n)</b> method.
690    # @keyparam parser An optional parser instance.  If not given, the
691    #     standard {@link XMLParser} parser is used.
692    # @return The document root element.
693    # @defreturn Element
694    # @exception ParseError If the parser fails to parse the document.
695
696    def parse(self, source, parser=None):
697        if not hasattr(source, "read"):
698            source = open(source, "rb")
699        if not parser:
700            parser = XMLParser(target=TreeBuilder())
701        while 1:
702            data = source.read(65536)
703            if not data:
704                break
705            parser.feed(data)
706        self._root = parser.close()
707        return self._root
708
709    ##
710    # Creates a tree iterator for the root element.  The iterator loops
711    # over all elements in this tree, in document order.
712    #
713    # @param tag What tags to look for (default is to return all elements)
714    # @return An iterator.
715    # @defreturn iterator
716
717    def iter(self, tag=None):
718        # assert self._root is not None
719        return self._root.iter(tag)
720
721    getiterator = iter
722
723    ##
724    # Finds the first toplevel element with given tag.
725    # Same as getroot().find(path).
726    #
727    # @param path What element to look for.
728    # @keyparam namespaces Optional namespace prefix map.
729    # @return The first matching element, or None if no element was found.
730    # @defreturn Element or None
731
732    def find(self, path, namespaces=None):
733        # assert self._root is not None
734        if path[:1] == "/":
735            path = "." + path
736            import warnings
737            warnings.warn(
738                "This search is broken in 1.3 and earlier, and will be "
739                "fixed in a future version. If you rely on the current "
740                "behaviour, change it to %r" % path,
741                FutureWarning, stacklevel=2
742                )
743        return self._root.find(path, namespaces)
744
745    ##
746    # Finds the element text for the first toplevel element with given
747    # tag.  Same as getroot().findtext(path).
748    #
749    # @param path What toplevel element to look for.
750    # @param default What to return if the element was not found.
751    # @keyparam namespaces Optional namespace prefix map.
752    # @return The text content of the first matching element, or the
753    #     default value no element was found.  Note that if the element
754    #     has is found, but has no text content, this method returns an
755    #     empty string.
756    # @defreturn string
757
758    def findtext(self, path, default=None, namespaces=None):
759        # assert self._root is not None
760        if path[:1] == "/":
761            path = "." + path
762            import warnings
763            warnings.warn(
764                "This search is broken in 1.3 and earlier, and will be "
765                "fixed in a future version. If you rely on the current "
766                "behaviour, change it to %r" % path,
767                FutureWarning, stacklevel=2
768                )
769        return self._root.findtext(path, default, namespaces)
770
771    ##
772    # Finds all toplevel elements with the given tag.
773    # Same as getroot().findall(path).
774    #
775    # @param path What element to look for.
776    # @keyparam namespaces Optional namespace prefix map.
777    # @return A list or iterator containing all matching elements,
778    #    in document order.
779    # @defreturn list of Element instances
780
781    def findall(self, path, namespaces=None):
782        # assert self._root is not None
783        if path[:1] == "/":
784            path = "." + path
785            import warnings
786            warnings.warn(
787                "This search is broken in 1.3 and earlier, and will be "
788                "fixed in a future version. If you rely on the current "
789                "behaviour, change it to %r" % path,
790                FutureWarning, stacklevel=2
791                )
792        return self._root.findall(path, namespaces)
793
794    ##
795    # Writes the element tree to a file, as XML.
796    #
797    # @def write(file, **options)
798    # @param file A file name, or a file object opened for writing.
799    # @param **options Options, given as keyword arguments.
800    # @keyparam encoding Optional output encoding (default is US-ASCII).
801    # @keyparam method Optional output method ("xml", "html", "text" or
802    #     "c14n"; default is "xml".
803    # @keyparam xml_declaration Controls if an XML declaration should
804    #     be added to the file.  Use False for never, True for always,
805    #     None for only if not US-ASCII or UTF-8.  None is default.
806
807    def write(self, file_or_filename,
808              # keyword arguments
809              encoding=None,
810              xml_declaration=None,
811              default_namespace=None,
812              method=None):
813        # assert self._root is not None
814        if hasattr(file_or_filename, "write"):
815            file = file_or_filename
816        else:
817            file = open(file_or_filename, "wb")
818        write = file.write
819        if not method:
820            method = "xml"
821        if not encoding:
822            if method == "c14n":
823                encoding = "utf-8"
824            else:
825                encoding = "us-ascii"
826        elif xml_declaration or (xml_declaration is None and
827                                 encoding not in ("utf-8", "us-ascii")):
828            if method == "xml":
829                write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
830        if method == "text":
831            _serialize_text(write, self._root, encoding)
832        else:
833            qnames, namespaces = _namespaces(
834                self._root, encoding, default_namespace
835                )
836            if method == "xml":
837                _serialize_xml(
838                    write, self._root, encoding, qnames, namespaces
839                    )
840            elif method == "html":
841                _serialize_html(
842                    write, self._root, encoding, qnames, namespaces
843                    )
844            elif method == "c14n":
845                import ElementC14N
846                ElementC14N._serialize_c14n(
847                    write, self._root, encoding, qnames, namespaces
848                    )
849            else:
850                raise ValueError("unknown method %r" % method)
851        if file_or_filename is not file:
852            file.close()
853
854    def write_c14n(self, file):
855        # lxml.etree compatibility.  use output method instead
856        return self.write(file, method="c14n")
857
858# --------------------------------------------------------------------
859# serialization support
860
861def _namespaces(elem, encoding, default_namespace=None):
862    # identify namespaces used in this tree
863
864    # maps qnames to *encoded* prefix:local names
865    qnames = {None: None}
866
867    # maps uri:s to prefixes
868    namespaces = {}
869    if default_namespace:
870        namespaces[default_namespace] = ""
871
872    def encode(text):
873        return text.encode(encoding)
874
875    def add_qname(qname):
876        # calculate serialized qname representation
877        try:
878            if qname[:1] == "{":
879                uri, tag = qname[1:].split("}", 1)
880                prefix = namespaces.get(uri)
881                if prefix is None:
882                    prefix = _namespace_map.get(uri)
883                    if prefix is None:
884                        prefix = "ns%d" % len(namespaces)
885                    if prefix != "xml":
886                        namespaces[uri] = prefix
887                if prefix:
888                    qnames[qname] = encode("%s:%s" % (prefix, tag))
889                else:
890                    qnames[qname] = encode(tag) # default element
891            else:
892                if default_namespace:
893                    # FIXME: can this be handled in XML 1.0?
894                    raise ValueError(
895                        "cannot use non-qualified names with "
896                        "default_namespace option"
897                        )
898                qnames[qname] = encode(qname)
899        except TypeError:
900            _raise_serialization_error(qname)
901
902    # populate qname and namespaces table
903    try:
904        iterate = elem.iter
905    except AttributeError:
906        iterate = elem.getiterator # cET compatibility
907    for elem in iterate():
908        tag = elem.tag
909        if isinstance(tag, QName) and tag.text not in qnames:
910            add_qname(tag.text)
911        elif isinstance(tag, basestring):
912            if tag not in qnames:
913                add_qname(tag)
914        elif tag is not None and tag is not Comment and tag is not PI:
915            _raise_serialization_error(tag)
916        for key, value in elem.items():
917            if isinstance(key, QName):
918                key = key.text
919            if key not in qnames:
920                add_qname(key)
921            if isinstance(value, QName) and value.text not in qnames:
922                add_qname(value.text)
923        text = elem.text
924        if isinstance(text, QName) and text.text not in qnames:
925            add_qname(text.text)
926    return qnames, namespaces
927
928def _serialize_xml(write, elem, encoding, qnames, namespaces):
929    tag = elem.tag
930    text = elem.text
931    if tag is Comment:
932        write("<!--%s-->" % _escape_cdata(text, encoding))
933    elif tag is ProcessingInstruction:
934        write("<?%s?>" % _escape_cdata(text, encoding))
935    else:
936        tag = qnames[tag]
937        if tag is None:
938            if text:
939                write(_escape_cdata(text, encoding))
940            for e in elem:
941                _serialize_xml(write, e, encoding, qnames, None)
942        else:
943            write("<" + tag)
944            items = elem.items()
945            if items or namespaces:
946                items.sort() # lexical order
947                if namespaces:
948                    i = namespaces.items()
949                    try:
950                        i.sort(key=lambda x: x[1]) # sort on prefix
951                    except TypeError:
952                        i.sort(lambda a, b: cmp(a[1], b[1])) # 2.3
953                    for v, k in i:
954                        if k:
955                            k = ":" + k
956                        write(" xmlns%s=\"%s\"" % (
957                            k.encode(encoding),
958                            _escape_attrib(v, encoding)
959                            ))
960                for k, v in items:
961                    if isinstance(k, QName):
962                        k = k.text
963                    if isinstance(v, QName):
964                        v = qnames[v.text]
965                    else:
966                        v = _escape_attrib(v, encoding)
967                    write(" %s=\"%s\"" % (qnames[k], v))
968            if text or len(elem):
969                write(">")
970                if text:
971                    write(_escape_cdata(text, encoding))
972                for e in elem:
973                    _serialize_xml(write, e, encoding, qnames, None)
974                write("</" + tag + ">")
975            else:
976                write(" />")
977    if elem.tail:
978        write(_escape_cdata(elem.tail, encoding))
979
980HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
981              "img", "input", "isindex", "link", "meta" "param")
982
983try:
984    HTML_EMPTY = set(HTML_EMPTY)
985except NameError:
986    pass
987
988def _serialize_html(write, elem, encoding, qnames, namespaces):
989    tag = elem.tag
990    text = elem.text
991    if tag is Comment:
992        write("<!--%s-->" % _escape_cdata(text, encoding))
993    elif tag is ProcessingInstruction:
994        write("<?%s?>" % _escape_cdata(text, encoding))
995    else:
996        tag = qnames[tag]
997        if tag is None:
998            if text:
999                write(_escape_cdata(text, encoding))
1000            for e in elem:
1001                _serialize_html(write, e, encoding, qnames, None)
1002        else:
1003            write("<" + tag)
1004            items = elem.items()
1005            if items or namespaces:
1006                if namespaces:
1007                    i = namespaces.items()
1008                    try:
1009                        i.sort(key=lambda x: x[1]) # sort on prefix
1010                    except TypeError:
1011                        i.sort(lambda a, b: cmp(a[1], b[1])) # 2.3
1012                    for v, k in i:
1013                        if k:
1014                            k = ":" + k
1015                        write(" xmlns%s=\"%s\"" % (
1016                            k.encode(encoding),
1017                            _escape_attrib(v, encoding)
1018                            ))
1019                items.sort() # lexical order
1020                for k, v in items:
1021                    if isinstance(k, QName):
1022                        k = k.text
1023                    if isinstance(v, QName):
1024                        v = qnames[v.text]
1025                    else:
1026                        v = _escape_attrib_html(v, encoding)
1027                    # FIXME: handle boolean attributes
1028                    write(" %s=\"%s\"" % (qnames[k], v))
1029            write(">")
1030            tag = tag.lower()
1031            if text:
1032                if tag == "script" or tag == "style":
1033                    write(_encode(text, encoding))
1034                else:
1035                    write(_escape_cdata(text, encoding))
1036            for e in elem:
1037                _serialize_html(write, e, encoding, qnames, None)
1038            if tag not in HTML_EMPTY:
1039                write("</" + tag + ">")
1040    if elem.tail:
1041        write(_escape_cdata(elem.tail, encoding))
1042
1043def _serialize_text(write, elem, encoding):
1044    for part in elem.itertext():
1045        write(part.encode(encoding))
1046    if elem.tail:
1047        write(elem.tail.encode(encoding))
1048
1049##
1050# Registers a namespace prefix.  The registry is global, and any
1051# existing mapping for either the given prefix or the namespace URI
1052# will be removed.
1053#
1054# @param prefix Namespace prefix.
1055# @param uri Namespace uri.  Tags and attributes in this namespace
1056#     will be serialized with the given prefix, if at all possible.
1057# @exception ValueError If the prefix is reserved, or is otherwise
1058#     invalid.
1059
1060def register_namespace(prefix, uri):
1061    if re.match("ns\d+$", prefix):
1062        raise ValueError("Prefix format reserved for internal use")
1063    for k, v in _namespace_map.items():
1064        if k == uri or v == prefix:
1065            del _namespace_map[k]
1066    _namespace_map[uri] = prefix
1067
1068_namespace_map = {
1069    # "well-known" namespace prefixes
1070    "http://www.w3.org/XML/1998/namespace": "xml",
1071    "http://www.w3.org/1999/xhtml": "html",
1072    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1073    "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1074    # xml schema
1075    "http://www.w3.org/2001/XMLSchema": "xs",
1076    "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1077    # dublic core
1078    "http://purl.org/dc/elements/1.1/": "dc",
1079}
1080
1081def _raise_serialization_error(text):
1082    raise TypeError(
1083        "cannot serialize %r (type %s)" % (text, type(text).__name__)
1084        )
1085
1086def _encode(text, encoding):
1087    try:
1088        return text.encode(encoding, "xmlcharrefreplace")
1089    except (TypeError, AttributeError):
1090        _raise_serialization_error(text)
1091
1092def _escape_cdata(text, encoding):
1093    # escape character data
1094    try:
1095        # it's worth avoiding do-nothing calls for strings that are
1096        # shorter than 500 character, or so.  assume that's, by far,
1097        # the most common case in most applications.
1098        if "&" in text:
1099            text = text.replace("&", "&amp;")
1100        if "<" in text:
1101            text = text.replace("<", "&lt;")
1102        if ">" in text:
1103            text = text.replace(">", "&gt;")
1104        return text.encode(encoding, "xmlcharrefreplace")
1105    except (TypeError, AttributeError):
1106        _raise_serialization_error(text)
1107
1108def _escape_attrib(text, encoding):
1109    # escape attribute value
1110    try:
1111        if "&" in text:
1112            text = text.replace("&", "&amp;")
1113        if "<" in text:
1114            text = text.replace("<", "&lt;")
1115        if ">" in text:
1116            text = text.replace(">", "&gt;")
1117        if "\"" in text:
1118            text = text.replace("\"", "&quot;")
1119        if "\n" in text:
1120            text = text.replace("\n", "&#10;")
1121        return text.encode(encoding, "xmlcharrefreplace")
1122    except (TypeError, AttributeError):
1123        _raise_serialization_error(text)
1124
1125def _escape_attrib_html(text, encoding):
1126    # escape attribute value
1127    try:
1128        if "&" in text:
1129            text = text.replace("&", "&amp;")
1130        if ">" in text:
1131            text = text.replace(">", "&gt;")
1132        if "\"" in text:
1133            text = text.replace("\"", "&quot;")
1134        return text.encode(encoding, "xmlcharrefreplace")
1135    except (TypeError, AttributeError):
1136        _raise_serialization_error(text)
1137
1138# --------------------------------------------------------------------
1139
1140##
1141# Generates a string representation of an XML element, including all
1142# subelements.
1143#
1144# @param element An Element instance.
1145# @return An encoded string containing the XML data.
1146# @defreturn string
1147
1148def tostring(element, encoding=None, method=None):
1149    class dummy:
1150        pass
1151    data = []
1152    file = dummy()
1153    file.write = data.append
1154    ElementTree(element).write(file, encoding, method=method)
1155    return "".join(data)
1156
1157##
1158# Generates a string representation of an XML element, including all
1159# subelements.  The string is returned as a sequence of string fragments.
1160#
1161# @param element An Element instance.
1162# @return A sequence object containing the XML data.
1163# @defreturn sequence
1164# @since 1.3
1165
1166def tostringlist(element, encoding=None):
1167    class dummy:
1168        pass
1169    data = []
1170    file = dummy()
1171    file.write = data.append
1172    ElementTree(element).write(file, encoding)
1173    # FIXME: merge small fragments into larger parts
1174    return data
1175
1176##
1177# Writes an element tree or element structure to sys.stdout.  This
1178# function should be used for debugging only.
1179# <p>
1180# The exact output format is implementation dependent.  In this
1181# version, it's written as an ordinary XML file.
1182#
1183# @param elem An element tree or an individual element.
1184
1185def dump(elem):
1186    # debugging
1187    if not isinstance(elem, ElementTree):
1188        elem = ElementTree(elem)
1189    elem.write(sys.stdout)
1190    tail = elem.getroot().tail
1191    if not tail or tail[-1] != "\n":
1192        sys.stdout.write("\n")
1193
1194# --------------------------------------------------------------------
1195# parsing
1196
1197##
1198# Parses an XML document into an element tree.
1199#
1200# @param source A filename or file object containing XML data.
1201# @param parser An optional parser instance.  If not given, the
1202#     standard {@link XMLParser} parser is used.
1203# @return An ElementTree instance
1204
1205def parse(source, parser=None):
1206    tree = ElementTree()
1207    tree.parse(source, parser)
1208    return tree
1209
1210##
1211# Parses an XML document into an element tree incrementally, and reports
1212# what's going on to the user.
1213#
1214# @param source A filename or file object containing XML data.
1215# @param events A list of events to report back.  If omitted, only "end"
1216#     events are reported.
1217# @param parser An optional parser instance.  If not given, the
1218#     standard {@link XMLParser} parser is used.
1219# @return A (event, elem) iterator.
1220
1221def iterparse(source, events=None, parser=None):
1222    if not hasattr(source, "read"):
1223        source = open(source, "rb")
1224    if not parser:
1225        parser = XMLParser(target=TreeBuilder())
1226    return _IterParseIterator(source, events, parser)
1227
1228class _IterParseIterator(object):
1229
1230    def __init__(self, source, events, parser):
1231        self._file = source
1232        self._events = []
1233        self._index = 0
1234        self.root = self._root = None
1235        self._parser = parser
1236        # wire up the parser for event reporting
1237        parser = self._parser._parser
1238        append = self._events.append
1239        if events is None:
1240            events = ["end"]
1241        for event in events:
1242            if event == "start":
1243                try:
1244                    parser.ordered_attributes = 1
1245                    parser.specified_attributes = 1
1246                    def handler(tag, attrib_in, event=event, append=append,
1247                                start=self._parser._start_list):
1248                        append((event, start(tag, attrib_in)))
1249                    parser.StartElementHandler = handler
1250                except AttributeError:
1251                    def handler(tag, attrib_in, event=event, append=append,
1252                                start=self._parser._start):
1253                        append((event, start(tag, attrib_in)))
1254                    parser.StartElementHandler = handler
1255            elif event == "end":
1256                def handler(tag, event=event, append=append,
1257                            end=self._parser._end):
1258                    append((event, end(tag)))
1259                parser.EndElementHandler = handler
1260            elif event == "start-ns":
1261                def handler(prefix, uri, event=event, append=append):
1262                    try:
1263                        uri = (uri or "").encode("ascii")
1264                    except UnicodeError:
1265                        pass
1266                    append((event, (prefix or "", uri or "")))
1267                parser.StartNamespaceDeclHandler = handler
1268            elif event == "end-ns":
1269                def handler(prefix, event=event, append=append):
1270                    append((event, None))
1271                parser.EndNamespaceDeclHandler = handler
1272
1273    def next(self):
1274        while 1:
1275            try:
1276                item = self._events[self._index]
1277            except IndexError:
1278                if self._parser is None:
1279                    self.root = self._root
1280                    raise StopIteration
1281                # load event buffer
1282                del self._events[:]
1283                self._index = 0
1284                data = self._file.read(16384)
1285                if data:
1286                    self._parser.feed(data)
1287                else:
1288                    self._root = self._parser.close()
1289                    self._parser = None
1290            else:
1291                self._index = self._index + 1
1292                return item
1293
1294    def __iter__(self):
1295        return self
1296
1297##
1298# Parses an XML document from a string constant.  This function can
1299# be used to embed "XML literals" in Python code.
1300#
1301# @param source A string containing XML data.
1302# @param parser An optional parser instance.  If not given, the
1303#     standard {@link XMLParser} parser is used.
1304# @return An Element instance.
1305# @defreturn Element
1306
1307def XML(text, parser=None):
1308    if not parser:
1309        parser = XMLParser(target=TreeBuilder())
1310    parser.feed(text)
1311    return parser.close()
1312
1313##
1314# Parses an XML document from a string constant, and also returns
1315# a dictionary which maps from element id:s to elements.
1316#
1317# @param source A string containing XML data.
1318# @param parser An optional parser instance.  If not given, the
1319#     standard {@link XMLParser} parser is used.
1320# @return A tuple containing an Element instance and a dictionary.
1321# @defreturn (Element, dictionary)
1322
1323def XMLID(text, parser=None):
1324    if not parser:
1325        parser = XMLParser(target=TreeBuilder())
1326    parser.feed(text)
1327    tree = parser.close()
1328    ids = {}
1329    for elem in tree.getiterator():
1330        id = elem.get("id")
1331        if id:
1332            ids[id] = elem
1333    return tree, ids
1334
1335##
1336# Parses an XML document from a string constant.  Same as {@link #XML}.
1337#
1338# @def fromstring(text)
1339# @param source A string containing XML data.
1340# @return An Element instance.
1341# @defreturn Element
1342
1343fromstring = XML
1344
1345##
1346# Parses an XML document from a sequence of string fragments.
1347#
1348# @param sequence A list or other sequence containing XML data fragments.
1349# @param parser An optional parser instance.  If not given, the
1350#     standard {@link XMLParser} parser is used.
1351# @return An Element instance.
1352# @defreturn Element
1353# @since 1.3
1354
1355def fromstringlist(sequence, parser=None):
1356    if not parser:
1357        parser = XMLParser(target=TreeBuilder())
1358    for text in sequence:
1359        parser.feed(text)
1360    return parser.close()
1361
1362# --------------------------------------------------------------------
1363
1364##
1365# Generic element structure builder.  This builder converts a sequence
1366# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
1367# #TreeBuilder.end} method calls to a well-formed element structure.
1368# <p>
1369# You can use this class to build an element structure using a custom XML
1370# parser, or a parser for some other XML-like format.
1371#
1372# @param element_factory Optional element factory.  This factory
1373#    is called to create new Element instances, as necessary.
1374
1375class TreeBuilder(object):
1376
1377    def __init__(self, element_factory=None):
1378        self._data = [] # data collector
1379        self._elem = [] # element stack
1380        self._last = None # last element
1381        self._tail = None # true if we're after an end tag
1382        if element_factory is None:
1383            element_factory = Element
1384        self._factory = element_factory
1385
1386    ##
1387    # Flushes the builder buffers, and returns the toplevel document
1388    # element.
1389    #
1390    # @return An Element instance.
1391    # @defreturn Element
1392
1393    def close(self):
1394        assert len(self._elem) == 0, "missing end tags"
1395        assert self._last != None, "missing toplevel element"
1396        return self._last
1397
1398    def _flush(self):
1399        if self._data:
1400            if self._last is not None:
1401                text = "".join(self._data)
1402                if self._tail:
1403                    assert self._last.tail is None, "internal error (tail)"
1404                    self._last.tail = text
1405                else:
1406                    assert self._last.text is None, "internal error (text)"
1407                    self._last.text = text
1408            self._data = []
1409
1410    ##
1411    # Adds text to the current element.
1412    #
1413    # @param data A string.  This should be either an 8-bit string
1414    #    containing ASCII text, or a Unicode string.
1415
1416    def data(self, data):
1417        self._data.append(data)
1418
1419    ##
1420    # Opens a new element.
1421    #
1422    # @param tag The element name.
1423    # @param attrib A dictionary containing element attributes.
1424    # @return The opened element.
1425    # @defreturn Element
1426
1427    def start(self, tag, attrs):
1428        self._flush()
1429        self._last = elem = self._factory(tag, attrs)
1430        if self._elem:
1431            self._elem[-1].append(elem)
1432        self._elem.append(elem)
1433        self._tail = 0
1434        return elem
1435
1436    ##
1437    # Closes the current element.
1438    #
1439    # @param tag The element name.
1440    # @return The closed element.
1441    # @defreturn Element
1442
1443    def end(self, tag):
1444        self._flush()
1445        self._last = self._elem.pop()
1446        assert self._last.tag == tag,\
1447               "end tag mismatch (expected %s, got %s)" % (
1448                   self._last.tag, tag)
1449        self._tail = 1
1450        return self._last
1451
1452##
1453# Element structure builder for XML source data, based on the
1454# <b>expat</b> parser.
1455#
1456# @keyparam target Target object.  If omitted, the builder uses an
1457#     instance of the standard {@link #TreeBuilder} class.
1458# @keyparam html Predefine HTML entities.  This flag is not supported
1459#     by the current implementation.
1460# @keyparam encoding Optional encoding.  If given, the value overrides
1461#     the encoding specified in the XML file.
1462# @see #ElementTree
1463# @see #TreeBuilder
1464
1465class XMLParser(object):
1466
1467    def __init__(self, html=0, target=None, encoding=None):
1468        try:
1469            from xml.parsers import expat
1470        except ImportError:
1471            try:
1472                import pyexpat; expat = pyexpat
1473            except ImportError:
1474                raise ImportError(
1475                    "No module named expat; use SimpleXMLTreeBuilder instead"
1476                    )
1477        parser = expat.ParserCreate(encoding, "}")
1478        if target is None:
1479            target = TreeBuilder()
1480        # underscored names are provided for compatibility only
1481        self.parser = self._parser = parser
1482        self.target = self._target = target
1483        self._error = expat.error
1484        self._names = {} # name memo cache
1485        # callbacks
1486        parser.DefaultHandlerExpand = self._default
1487        parser.StartElementHandler = self._start
1488        parser.EndElementHandler = self._end
1489        parser.CharacterDataHandler = self._data
1490        # let expat do the buffering, if supported
1491        try:
1492            self._parser.buffer_text = 1
1493        except AttributeError:
1494            pass
1495        # use new-style attribute handling, if supported
1496        try:
1497            self._parser.ordered_attributes = 1
1498            self._parser.specified_attributes = 1
1499            parser.StartElementHandler = self._start_list
1500        except AttributeError:
1501            pass
1502        self._doctype = None
1503        self.entity = {}
1504        try:
1505            self.version = "Expat %d.%d.%d" % expat.version_info
1506        except AttributeError:
1507            pass # unknown
1508
1509    def _raiseerror(self, value):
1510        err = ParseError(value)
1511        err.code = value.code
1512        err.position = value.lineno, value.offset
1513        raise err
1514
1515    def _fixtext(self, text):
1516        # convert text string to ascii, if possible
1517        try:
1518            return text.encode("ascii")
1519        except UnicodeError:
1520            return text
1521
1522    def _fixname(self, key):
1523        # expand qname, and convert name string to ascii, if possible
1524        try:
1525            name = self._names[key]
1526        except KeyError:
1527            name = key
1528            if "}" in name:
1529                name = "{" + name
1530            self._names[key] = name = self._fixtext(name)
1531        return name
1532
1533    def _start(self, tag, attrib_in):
1534        fixname = self._fixname
1535        fixtext = self._fixtext
1536        tag = fixname(tag)
1537        attrib = {}
1538        for key, value in attrib_in.items():
1539            attrib[fixname(key)] = fixtext(value)
1540        return self.target.start(tag, attrib)
1541
1542    def _start_list(self, tag, attrib_in):
1543        fixname = self._fixname
1544        fixtext = self._fixtext
1545        tag = fixname(tag)
1546        attrib = {}
1547        if attrib_in:
1548            for i in range(0, len(attrib_in), 2):
1549                attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1])
1550        return self.target.start(tag, attrib)
1551
1552    def _data(self, text):
1553        return self.target.data(self._fixtext(text))
1554
1555    def _end(self, tag):
1556        return self.target.end(self._fixname(tag))
1557
1558    def _default(self, text):
1559        prefix = text[:1]
1560        if prefix == "&":
1561            # deal with undefined entities
1562            try:
1563                self.target.data(self.entity[text[1:-1]])
1564            except KeyError:
1565                from xml.parsers import expat
1566                err = expat.error(
1567                    "undefined entity %s: line %d, column %d" %
1568                    (text, self._parser.ErrorLineNumber,
1569                    self._parser.ErrorColumnNumber)
1570                    )
1571                err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
1572                err.lineno = self._parser.ErrorLineNumber
1573                err.offset = self._parser.ErrorColumnNumber
1574                raise err
1575        elif prefix == "<" and text[:9] == "<!DOCTYPE":
1576            self._doctype = [] # inside a doctype declaration
1577        elif self._doctype is not None:
1578            # parse doctype contents
1579            if prefix == ">":
1580                self._doctype = None
1581                return
1582            text = text.strip()
1583            if not text:
1584                return
1585            self._doctype.append(text)
1586            n = len(self._doctype)
1587            if n > 2:
1588                type = self._doctype[1]
1589                if type == "PUBLIC" and n == 4:
1590                    name, type, pubid, system = self._doctype
1591                elif type == "SYSTEM" and n == 3:
1592                    name, type, system = self._doctype
1593                    pubid = None
1594                else:
1595                    return
1596                if pubid:
1597                    pubid = pubid[1:-1]
1598                if hasattr(self.target, "doctype"):
1599                    self.target.doctype(name, pubid, system[1:-1])
1600                self._doctype = None
1601
1602    ##
1603    # Feeds data to the parser.
1604    #
1605    # @param data Encoded data.
1606
1607    def feed(self, data):
1608        try:
1609            self._parser.Parse(data, 0)
1610        except self._error, v:
1611            self._raiseerror(v)
1612
1613    ##
1614    # Finishes feeding data to the parser.
1615    #
1616    # @return An element structure.
1617    # @defreturn Element
1618
1619    def close(self):
1620        try:
1621            self._parser.Parse("", 1) # end of data
1622        except self._error, v:
1623            self._raiseerror(v)
1624        tree = self.target.close()
1625        del self.target, self._parser # get rid of circular references
1626        return tree
1627
1628# compatibility
1629XMLTreeBuilder = XMLParser
Note: See TracBrowser for help on using the repository browser.