source: exist/trunk/python/elementtree-1.3a6-20070212-badc/elementtree/ElementTree.py @ 3429

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/elementtree-1.3a6-20070212-badc/elementtree/ElementTree.py@4663
Revision 3429, 56.2 KB checked in by pjkersha, 12 years ago (diff)

Added latest ElementTree version (12/2/08) from Fredrik Lundh

Line 
1#
2# ElementTree
3# $Id: ElementTree.py 3375 2008-02-13 08:05:08Z fredrik $
4#
5# light-weight XML support for Python 2.3 and later.
6#
7# history:
8# 2001-10-20 fl   created (from various sources)
9# 2001-11-01 fl   return root from parse method
10# 2002-02-16 fl   sort attributes in lexical order
11# 2002-04-06 fl   TreeBuilder refactoring, added PythonDoc markup
12# 2002-05-01 fl   finished TreeBuilder refactoring
13# 2002-07-14 fl   added basic namespace support to ElementTree.write
14# 2002-07-25 fl   added QName attribute support
15# 2002-10-20 fl   fixed encoding in write
16# 2002-11-24 fl   changed default encoding to ascii; fixed attribute encoding
17# 2002-11-27 fl   accept file objects or file names for parse/write
18# 2002-12-04 fl   moved XMLTreeBuilder back to this module
19# 2003-01-11 fl   fixed entity encoding glitch for us-ascii
20# 2003-02-13 fl   added XML literal factory
21# 2003-02-21 fl   added ProcessingInstruction/PI factory
22# 2003-05-11 fl   added tostring/fromstring helpers
23# 2003-05-26 fl   added ElementPath support
24# 2003-07-05 fl   added makeelement factory method
25# 2003-07-28 fl   added more well-known namespace prefixes
26# 2003-08-15 fl   fixed typo in ElementTree.findtext (Thomas Dartsch)
27# 2003-09-04 fl   fall back on emulator if ElementPath is not installed
28# 2003-10-31 fl   markup updates
29# 2003-11-15 fl   fixed nested namespace bug
30# 2004-03-28 fl   added XMLID helper
31# 2004-06-02 fl   added default support to findtext
32# 2004-06-08 fl   fixed encoding of non-ascii element/attribute names
33# 2004-08-23 fl   take advantage of post-2.1 expat features
34# 2004-09-03 fl   made Element class visible; removed factory
35# 2005-02-01 fl   added iterparse implementation
36# 2005-03-02 fl   fixed iterparse support for pre-2.2 versions
37# 2005-11-12 fl   added tostringlist/fromstringlist helpers
38# 2006-07-05 fl   merged in selected changes from the 1.3 sandbox
39# 2006-07-05 fl   removed support for 2.1 and earlier
40# 2007-06-21 fl   added deprecation/future warnings
41# 2007-08-25 fl   added doctype hook, added parser version attribute etc
42# 2007-08-26 fl   added new serializer code (better namespace handling, etc)
43# 2007-08-27 fl   warn for broken /tag searches on tree level
44# 2007-09-02 fl   added html/text methods to serializer (experimental)
45# 2007-09-05 fl   added method argument to tostring/tostringlist
46# 2007-09-06 fl   improved error handling
47# 2007-09-13 fl   added itertext, iterfind; assorted cleanups
48# 2007-12-15 fl   added C14N hooks, copy method (experimental)
49#
50# Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved.
51#
52# fredrik@pythonware.com
53# http://www.pythonware.com
54#
55# --------------------------------------------------------------------
56# The ElementTree toolkit is
57#
58# Copyright (c) 1999-2008 by Fredrik Lundh
59#
60# By obtaining, using, and/or copying this software and/or its
61# associated documentation, you agree that you have read, understood,
62# and will comply with the following terms and conditions:
63#
64# Permission to use, copy, modify, and distribute this software and
65# its associated documentation for any purpose and without fee is
66# hereby granted, provided that the above copyright notice appears in
67# all copies, and that both that copyright notice and this permission
68# notice appear in supporting documentation, and that the name of
69# Secret Labs AB or the author not be used in advertising or publicity
70# pertaining to distribution of the software without specific, written
71# prior permission.
72#
73# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
74# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
75# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
76# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
77# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
78# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
79# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
80# OF THIS SOFTWARE.
81# --------------------------------------------------------------------
82
83__all__ = [
84    # public symbols
85    "Comment",
86    "dump",
87    "Element", "ElementTree",
88    "fromstring", "fromstringlist",
89    "iselement", "iterparse",
90    "parse", "ParseError",
91    "PI", "ProcessingInstruction",
92    "QName",
93    "SubElement",
94    "tostring", "tostringlist",
95    "TreeBuilder",
96    "VERSION",
97    "XML",
98    "XMLParser", "XMLTreeBuilder",
99    ]
100
101VERSION = "1.3aX"
102
103##
104# The <b>Element</b> type is a flexible container object, designed to
105# store hierarchical data structures in memory. The type can be
106# described as a cross between a list and a dictionary.
107# <p>
108# Each element has a number of properties associated with it:
109# <ul>
110# <li>a <i>tag</i>. This is a string identifying what kind of data
111# this element represents (the element type, in other words).</li>
112# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
113# <li>a <i>text</i> string.</li>
114# <li>an optional <i>tail</i> string.</li>
115# <li>a number of <i>child elements</i>, stored in a Python sequence</li>
116# </ul>
117#
118# To create an element instance, use the {@link #Element} constructor
119# or the {@link #SubElement} factory function.
120# <p>
121# The {@link #ElementTree} class can be used to wrap an element
122# structure, and convert it from and to XML.
123##
124
125import sys, re
126
127class _SimpleElementPath(object):
128    # emulate pre-1.2 find/findtext/findall behaviour
129    def find(self, element, tag, namespaces=None):
130        for elem in element:
131            if elem.tag == tag:
132                return elem
133        return None
134    def findtext(self, element, tag, default=None, namespaces=None):
135        for elem in element:
136            if elem.tag == tag:
137                return elem.text or ""
138        return default
139    def findall(self, element, tag, namespaces=None):
140        if tag[:3] == ".//":
141            return list(element.iter(tag[3:]))
142        result = []
143        for elem in element:
144            if elem.tag == tag:
145                result.append(elem)
146        return result
147
148try:
149    import ElementPath
150except ImportError:
151    ElementPath = _SimpleElementPath()
152
153##
154# Parser error.  This is a subclass of <b>SyntaxError</b>.
155# <p>
156# In addition to the exception value, an exception instance contains a
157# specific exception code in the <b>code</b> attribute, and the line and
158# column of the error in the <b>position</b> attribute.
159
160class ParseError(SyntaxError):
161    pass
162
163# --------------------------------------------------------------------
164
165##
166# Checks if an object appears to be a valid element object.
167#
168# @param An element instance.
169# @return A true value if this is an element object.
170# @defreturn flag
171
172def iselement(element):
173    # FIXME: not sure about this; might be a better idea to look
174    # for tag/attrib/text attributes
175    return isinstance(element, Element) or hasattr(element, "tag")
176
177##
178# Element class.  This class defines the Element interface, and
179# provides a reference implementation of this interface.
180# <p>
181# The element name, attribute names, and attribute values can be
182# either ASCII strings (ordinary Python strings containing only 7-bit
183# ASCII characters) or Unicode strings.
184#
185# @param tag The element name.
186# @param attrib An optional dictionary, containing element attributes.
187# @param **extra Additional attributes, given as keyword arguments.
188# @see Element
189# @see SubElement
190# @see Comment
191# @see ProcessingInstruction
192
193class Element(object):
194    # <tag attrib>text<child/>...</tag>tail
195
196    ##
197    # (Attribute) Element tag.
198
199    tag = None
200
201    ##
202    # (Attribute) Element attribute dictionary.  Where possible, use
203    # {@link #Element.get},
204    # {@link #Element.set},
205    # {@link #Element.keys}, and
206    # {@link #Element.items} to access
207    # element attributes.
208
209    attrib = None
210
211    ##
212    # (Attribute) Text before first subelement.  This is either a
213    # string or the value None.  Note that if there was no text, this
214    # attribute may be either None or an empty string, depending on
215    # the parser.
216
217    text = None
218
219    ##
220    # (Attribute) Text after this element's end tag, but before the
221    # next sibling element's start tag.  This is either a string or
222    # the value None.  Note that if there was no text, this attribute
223    # may be either None or an empty string, depending on the parser.
224
225    tail = None # text after end tag, if any
226
227    # constructor
228
229    def __init__(self, tag, attrib={}, **extra):
230        attrib = attrib.copy()
231        attrib.update(extra)
232        self.tag = tag
233        self.attrib = attrib
234        self._children = []
235
236    def __repr__(self):
237        return "<Element %s at %x>" % (repr(self.tag), id(self))
238
239    ##
240    # Creates a new element object of the same type as this element.
241    #
242    # @param tag Element tag.
243    # @param attrib Element attributes, given as a dictionary.
244    # @return A new element instance.
245
246    def makeelement(self, tag, attrib):
247        return self.__class__(tag, attrib)
248
249    ##
250    # (Experimental) Copies the current element.  This creates a
251    # shallow copy; subelements will be shared with the original tree.
252    #
253    # @return A new element instance.
254
255    def copy(self):
256        elem = self.makeelement(self.tag, self.attrib)
257        elem.text = self.text
258        elem.tail = self.tail
259        elem[:] = self
260        return elem
261
262    ##
263    # Returns the number of subelements.  Note that this only counts
264    # full elements; to check if there's any content in an element, you
265    # have to check both the length and the <b>text</b> attribute.
266    #
267    # @return The number of subelements.
268
269    def __len__(self):
270        return len(self._children)
271
272    def __nonzero__(self):
273        import warnings
274        warnings.warn(
275            "The behavior of this method will change in future versions. "
276            "Use specific 'len(elem)' or 'elem is not None' test instead.",
277            FutureWarning, stacklevel=2
278            )
279        return len(self._children) != 0 # emulate old behaviour, for now
280
281    ##
282    # Returns the given subelement, by index.
283    #
284    # @param index What subelement to return.
285    # @return The given subelement.
286    # @exception IndexError If the given element does not exist.
287
288    def __getitem__(self, index):
289        return self._children[index]
290
291    ##
292    # Replaces the given subelement, by index.
293    #
294    # @param index What subelement to replace.
295    # @param element The new element value.
296    # @exception IndexError If the given element does not exist.
297    # @exception AssertionError If element is not a valid object.
298
299    def __setitem__(self, index, element):
300        assert iselement(element)
301        self._children[index] = element
302
303    ##
304    # Deletes the given subelement, by index.
305    #
306    # @param index What subelement to delete.
307    # @exception IndexError If the given element does not exist.
308
309    def __delitem__(self, index):
310        del self._children[index]
311
312    ##
313    # Returns a list containing subelements in the given range.
314    #
315    # @param start The first subelement to return.
316    # @param stop The first subelement that shouldn't be returned.
317    # @return A sequence object containing subelements.
318
319    def __getslice__(self, start, stop):
320        return self._children[start:stop]
321
322    ##
323    # Replaces a number of subelements with elements from a sequence.
324    #
325    # @param start The first subelement to replace.
326    # @param stop The first subelement that shouldn't be replaced.
327    # @param elements A sequence object with zero or more elements.
328    # @exception AssertionError If a sequence member is not a valid object.
329
330    def __setslice__(self, start, stop, elements):
331        for element in elements:
332            assert iselement(element)
333        self._children[start:stop] = list(elements)
334
335    ##
336    # Deletes a number of subelements.
337    #
338    # @param start The first subelement to delete.
339    # @param stop The first subelement to leave in there.
340
341    def __delslice__(self, start, stop):
342        del self._children[start:stop]
343
344    ##
345    # Adds a subelement to the end of this element.  In document order,
346    # the new element will appear after the last existing subelement (or
347    # directly after the text, if it's the first subelement), but before
348    # the end tag for this element.
349    #
350    # @param element The element to add.
351    # @exception AssertionError If a sequence member is not a valid object.
352
353    def append(self, element):
354        assert iselement(element)
355        self._children.append(element)
356
357    ##
358    # Appends subelements from a sequence.
359    #
360    # @param elements A sequence object with zero or more elements.
361    # @exception AssertionError If a subelement is not a valid object.
362    # @since 1.3
363
364    def extend(self, elements):
365        for element in elements:
366            assert iselement(element)
367        self._children.extend(elements)
368
369    ##
370    # Inserts a subelement at the given position in this element.
371    #
372    # @param index Where to insert the new subelement.
373    # @exception AssertionError If the element is not a valid object.
374
375    def insert(self, index, element):
376        assert iselement(element)
377        self._children.insert(index, element)
378
379    ##
380    # Removes a matching subelement.  Unlike the <b>find</b> methods,
381    # this method compares elements based on identity, not on tag
382    # value or contents.  To remove subelements by other means, the
383    # easiest way is often to use a list comprehension to select what
384    # elements to keep, and use slice assignment to update the parent
385    # element.
386    #
387    # @param element What element to remove.
388    # @exception ValueError If a matching element could not be found.
389    # @exception AssertionError If the element is not a valid object.
390
391    def remove(self, element):
392        assert iselement(element)
393        self._children.remove(element)
394
395    ##
396    # (Deprecated) Returns all subelements.  The elements are returned
397    # in document order.
398    #
399    # @return A list of subelements.
400    # @defreturn list of Element instances
401
402    def getchildren(self):
403        import warnings
404        warnings.warn(
405            "This method will be removed in future versions. "
406            "Use 'list(elem)' or iteration over elem instead.",
407            DeprecationWarning, stacklevel=2
408            )
409        return self._children
410
411    ##
412    # Finds the first matching subelement, by tag name or path.
413    #
414    # @param path What element to look for.
415    # @keyparam namespaces Optional namespace prefix map.
416    # @return The first matching element, or None if no element was found.
417    # @defreturn Element or None
418
419    def find(self, path, namespaces=None):
420        return ElementPath.find(self, path, namespaces)
421
422    ##
423    # Finds text for the first matching subelement, by tag name or path.
424    #
425    # @param path What element to look for.
426    # @param default What to return if the element was not found.
427    # @keyparam namespaces Optional namespace prefix map.
428    # @return The text content of the first matching element, or the
429    #     default value no element was found.  Note that if the element
430    #     has is found, but has no text content, this method returns an
431    #     empty string.
432    # @defreturn string
433
434    def findtext(self, path, default=None, namespaces=None):
435        return ElementPath.findtext(self, path, default, namespaces)
436
437    ##
438    # Finds all matching subelements, by tag name or path.
439    #
440    # @param path What element to look for.
441    # @keyparam namespaces Optional namespace prefix map.
442    # @return A list or other sequence containing all matching elements,
443    #    in document order.
444    # @defreturn list of Element instances
445
446    def findall(self, path, namespaces=None):
447        return ElementPath.findall(self, path, namespaces)
448
449    ##
450    # Finds all matching subelements, by tag name or path.
451    #
452    # @param path What element to look for.
453    # @keyparam namespaces Optional namespace prefix map.
454    # @return An iterator or sequence containing all matching elements,
455    #    in document order.
456    # @defreturn a generated sequence of Element instances
457
458    def iterfind(self, path, namespaces=None):
459        return ElementPath.iterfind(self, path, namespaces)
460
461    ##
462    # Resets an element.  This function removes all subelements, clears
463    # all attributes, and sets the <b>text</b> and <b>tail</b> attributes
464    # to None.
465
466    def clear(self):
467        self.attrib.clear()
468        self._children = []
469        self.text = self.tail = None
470
471    ##
472    # Gets an element attribute.  Equivalent to <b>attrib.get</b>, but
473    # some implementations may handle this a bit more efficiently.
474    #
475    # @param key What attribute to look for.
476    # @param default What to return if the attribute was not found.
477    # @return The attribute value, or the default value, if the
478    #     attribute was not found.
479    # @defreturn string or None
480
481    def get(self, key, default=None):
482        return self.attrib.get(key, default)
483
484    ##
485    # Sets an element attribute.  Equivalent to <b>attrib[key] = value</b>,
486    # but some implementations may handle this a bit more efficiently.
487    #
488    # @param key What attribute to set.
489    # @param value The attribute value.
490
491    def set(self, key, value):
492        self.attrib[key] = value
493
494    ##
495    # Gets a list of attribute names.  The names are returned in an
496    # arbitrary order (just like for an ordinary Python dictionary).
497    # Equivalent to <b>attrib.keys()</b>.
498    #
499    # @return A list of element attribute names.
500    # @defreturn list of strings
501
502    def keys(self):
503        return self.attrib.keys()
504
505    ##
506    # Gets element attributes, as a sequence.  The attributes are
507    # returned in an arbitrary order.  Equivalent to <b>attrib.items()</b>.
508    #
509    # @return A list of (name, value) tuples for all attributes.
510    # @defreturn list of (string, string) tuples
511
512    def items(self):
513        return self.attrib.items()
514
515    ##
516    # Creates a tree iterator.  The iterator loops over this element
517    # and all subelements, in document order, and returns all elements
518    # with a matching tag.
519    # <p>
520    # If the tree structure is modified during iteration, new or removed
521    # elements may or may not be included.  To get a stable set, use the
522    # list() function on the iterator, and loop over the resulting list.
523    #
524    # @param tag What tags to look for (default is to return all elements).
525    # @return An iterator containing all the matching elements.
526    # @defreturn iterator
527
528    def iter(self, tag=None):
529        if tag == "*":
530            tag = None
531        if tag is None or self.tag == tag:
532            yield self
533        for e in self._children:
534            for e in e.iter(tag):
535                yield e
536
537    # compatibility
538    def getiterator(self, tag=None):
539        # enable in 1.4
540        # import warnings
541        # warnings.warn(
542        #     "This method will be removed in future versions. "
543        #     "Use 'elem.iter()' or 'list(elem.iter())' instead.",
544        #     DeprecationWarning, stacklevel=2
545        # )
546        return list(self.iter(tag))
547
548    ##
549    # Creates a text iterator.  The iterator loops over this element
550    # and all subelements, in document order, and returns all inner
551    # text.
552    #
553    # @return An iterator containing all inner text.
554    # @defreturn iterator
555
556    def itertext(self):
557        tag = self.tag
558        if not isinstance(tag, basestring) and tag is not None:
559            return
560        if self.text:
561            yield self.text
562        for e in self:
563            for s in e.itertext():
564                yield s
565            if e.tail:
566                yield e.tail
567
568# compatibility
569_Element = _ElementInterface = Element
570
571##
572# Subelement factory.  This function creates an element instance, and
573# appends it to an existing element.
574# <p>
575# The element name, attribute names, and attribute values can be
576# either 8-bit ASCII strings or Unicode strings.
577#
578# @param parent The parent element.
579# @param tag The subelement name.
580# @param attrib An optional dictionary, containing element attributes.
581# @param **extra Additional attributes, given as keyword arguments.
582# @return An element instance.
583# @defreturn Element
584
585def SubElement(parent, tag, attrib={}, **extra):
586    attrib = attrib.copy()
587    attrib.update(extra)
588    element = parent.makeelement(tag, attrib)
589    parent.append(element)
590    return element
591
592##
593# Comment element factory.  This factory function creates a special
594# element that will be serialized as an XML comment by the standard
595# serializer.
596# <p>
597# The comment string can be either an 8-bit ASCII string or a Unicode
598# string.
599#
600# @param text A string containing the comment string.
601# @return An element instance, representing a comment.
602# @defreturn Element
603
604def Comment(text=None):
605    element = Element(Comment)
606    element.text = text
607    return element
608
609##
610# PI element factory.  This factory function creates a special element
611# that will be serialized as an XML processing instruction by the standard
612# serializer.
613#
614# @param target A string containing the PI target.
615# @param text A string containing the PI contents, if any.
616# @return An element instance, representing a PI.
617# @defreturn Element
618
619def ProcessingInstruction(target, text=None):
620    element = Element(ProcessingInstruction)
621    element.text = target
622    if text:
623        element.text = element.text + " " + text
624    return element
625
626PI = ProcessingInstruction
627
628##
629# QName wrapper.  This can be used to wrap a QName attribute value, in
630# order to get proper namespace handling on output.
631#
632# @param text A string containing the QName value, in the form {uri}local,
633#     or, if the tag argument is given, the URI part of a QName.
634# @param tag Optional tag.  If given, the first argument is interpreted as
635#     an URI, and this argument is interpreted as a local name.
636# @return An opaque object, representing the QName.
637
638class QName(object):
639    def __init__(self, text_or_uri, tag=None):
640        if tag:
641            text_or_uri = "{%s}%s" % (text_or_uri, tag)
642        self.text = text_or_uri
643    def __str__(self):
644        return self.text
645    def __hash__(self):
646        return hash(self.text)
647    def __cmp__(self, other):
648        if isinstance(other, QName):
649            return cmp(self.text, other.text)
650        return cmp(self.text, other)
651
652# --------------------------------------------------------------------
653
654##
655# ElementTree wrapper class.  This class represents an entire element
656# hierarchy, and adds some extra support for serialization to and from
657# standard XML.
658#
659# @param element Optional root element.
660# @keyparam file Optional file handle or file name.  If given, the
661#     tree is initialized with the contents of this XML file.
662
663class ElementTree(object):
664
665    def __init__(self, element=None, file=None):
666        assert element is None or iselement(element)
667        self._root = element # first node
668        if file:
669            self.parse(file)
670
671    ##
672    # Gets the root element for this tree.
673    #
674    # @return An element instance.
675    # @defreturn Element
676
677    def getroot(self):
678        return self._root
679
680    ##
681    # Replaces the root element for this tree.  This discards the
682    # current contents of the tree, and replaces it with the given
683    # element.  Use with care.
684    #
685    # @param element An element instance.
686
687    def _setroot(self, element):
688        assert iselement(element)
689        self._root = element
690
691    ##
692    # Loads an external XML document into this element tree.
693    #
694    # @param source A file name or file object.  If a file object is
695    #     given, it only has to implement a <b>read(n)</b> method.
696    # @keyparam parser An optional parser instance.  If not given, the
697    #     standard {@link XMLParser} parser is used.
698    # @return The document root element.
699    # @defreturn Element
700    # @exception ParseError If the parser fails to parse the document.
701
702    def parse(self, source, parser=None):
703        if not hasattr(source, "read"):
704            source = open(source, "rb")
705        if not parser:
706            parser = XMLParser(target=TreeBuilder())
707        while 1:
708            data = source.read(65536)
709            if not data:
710                break
711            parser.feed(data)
712        self._root = parser.close()
713        return self._root
714
715    ##
716    # Creates a tree iterator for the root element.  The iterator loops
717    # over all elements in this tree, in document order.
718    #
719    # @param tag What tags to look for (default is to return all elements)
720    # @return An iterator.
721    # @defreturn iterator
722
723    def iter(self, tag=None):
724        assert self._root is not None
725        return self._root.iter(tag)
726
727    getiterator = iter
728
729    ##
730    # Finds the first toplevel element with given tag.
731    # Same as getroot().find(path).
732    #
733    # @param path What element to look for.
734    # @keyparam namespaces Optional namespace prefix map.
735    # @return The first matching element, or None if no element was found.
736    # @defreturn Element or None
737
738    def find(self, path, namespaces=None):
739        assert self._root is not None
740        if path[:1] == "/":
741            path = "." + path
742            import warnings
743            warnings.warn(
744                "This search is broken in 1.3 and earlier, and will be "
745                "fixed in a future version. If you rely on the current "
746                "behaviour, change it to %r" % path,
747                FutureWarning, stacklevel=2
748                )
749        return self._root.find(path, namespaces)
750
751    ##
752    # Finds the element text for the first toplevel element with given
753    # tag.  Same as getroot().findtext(path).
754    #
755    # @param path What toplevel element to look for.
756    # @param default What to return if the element was not found.
757    # @keyparam namespaces Optional namespace prefix map.
758    # @return The text content of the first matching element, or the
759    #     default value no element was found.  Note that if the element
760    #     has is found, but has no text content, this method returns an
761    #     empty string.
762    # @defreturn string
763
764    def findtext(self, path, default=None, namespaces=None):
765        assert self._root is not None
766        if path[:1] == "/":
767            path = "." + path
768            import warnings
769            warnings.warn(
770                "This search is broken in 1.3 and earlier, and will be "
771                "fixed in a future version. If you rely on the current "
772                "behaviour, change it to %r" % path,
773                FutureWarning, stacklevel=2
774                )
775        return self._root.findtext(path, default, namespaces)
776
777    ##
778    # Finds all toplevel elements with the given tag.
779    # Same as getroot().findall(path).
780    #
781    # @param path What element to look for.
782    # @keyparam namespaces Optional namespace prefix map.
783    # @return A list or iterator containing all matching elements,
784    #    in document order.
785    # @defreturn list of Element instances
786
787    def findall(self, path, namespaces=None):
788        assert self._root is not None
789        if path[:1] == "/":
790            path = "." + path
791            import warnings
792            warnings.warn(
793                "This search is broken in 1.3 and earlier, and will be "
794                "fixed in a future version. If you rely on the current "
795                "behaviour, change it to %r" % path,
796                FutureWarning, stacklevel=2
797                )
798        return self._root.findall(path, namespaces)
799
800    ##
801    # Writes the element tree to a file, as XML.
802    #
803    # @def write(file, **options)
804    # @param file A file name, or a file object opened for writing.
805    # @param **options Options, given as keyword arguments.
806    # @keyparam encoding Optional output encoding (default is US-ASCII).
807    # @keyparam method Optional output method ("xml", "html", "text" or
808    #     "c14n"; default is "xml".
809    # @keyparam xml_declaration Controls if an XML declaration should
810    #     be added to the file.  Use False for never, True for always,
811    #     None for only if not US-ASCII or UTF-8.  None is default.
812
813    def write(self, file,
814              # keyword arguments
815              encoding=None,
816              xml_declaration=None,
817              default_namespace=None,
818              method=None):
819        assert self._root is not None
820        if not hasattr(file, "write"):
821            file = open(file, "wb")
822        write = file.write
823        if not method:
824            method = "xml"
825        if not encoding:
826            if method == "c14n":
827                encoding = "utf-8"
828            else:
829                encoding = "us-ascii"
830        elif xml_declaration or (xml_declaration is None and
831                                 encoding not in ("utf-8", "us-ascii")):
832            if method == "xml":
833                write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
834        if method == "text":
835            _serialize_text(write, self._root, encoding)
836        else:
837            qnames, namespaces = _namespaces(
838                self._root, encoding, default_namespace
839                )
840            if method == "xml":
841                _serialize_xml(
842                    write, self._root, encoding, qnames, namespaces
843                    )
844            elif method == "html":
845                _serialize_html(
846                    write, self._root, encoding, qnames, namespaces
847                    )
848            elif method == "c14n":
849                import ElementC14N
850                ElementC14N._serialize_c14n(
851                    write, self._root, encoding, qnames, namespaces
852                    )
853            else:
854                raise ValueError("unknown method %r" % method)
855
856    def write_c14n(self, file):
857        # lxml.etree compatibility.  use output method instead
858        return self.write(file, method="c14n")
859
860# --------------------------------------------------------------------
861# serialization support
862
863def _namespaces(elem, encoding, default_namespace=None):
864    # identify namespaces used in this tree
865
866    # maps qnames to *encoded* prefix:local names
867    qnames = {None: None}
868
869    # maps uri:s to prefixes
870    namespaces = {}
871    if default_namespace:
872        namespaces[default_namespace] = ""
873
874    def encode(text):
875        return text.encode(encoding)
876
877    def add_qname(qname):
878        # calculate serialized qname representation
879        try:
880            if qname[:1] == "{":
881                uri, tag = qname[1:].split("}", 1)
882                prefix = namespaces.get(uri)
883                if prefix is None:
884                    prefix = _namespace_map.get(uri)
885                    if prefix is None:
886                        prefix = "ns%d" % len(namespaces)
887                    if prefix != "xml":
888                        namespaces[uri] = prefix
889                if prefix:
890                    qnames[qname] = encode("%s:%s" % (prefix, tag))
891                else:
892                    qnames[qname] = encode(tag) # default element
893            else:
894                if default_namespace:
895                    # FIXME: can this be handled in XML 1.0?
896                    raise ValueError(
897                        "cannot use non-qualified names with "
898                        "default_namespace option"
899                        )
900                qnames[qname] = encode(qname)
901        except TypeError:
902            _raise_serialization_error(qname)
903
904    # populate qname and namespaces table
905    try:
906        iterate = elem.iter
907    except AttributeError:
908        iterate = elem.getiterator # cET compatibility
909    for elem in iterate():
910        tag = elem.tag
911        if isinstance(tag, QName) and tag.text not in qnames:
912            add_qname(tag.text)
913        elif isinstance(tag, basestring):
914            if tag not in qnames:
915                add_qname(tag)
916        elif tag is not None and tag is not Comment and tag is not PI:
917            _raise_serialization_error(tag)
918        for key, value in elem.items():
919            if isinstance(key, QName):
920                key = key.text
921            if key not in qnames:
922                add_qname(key)
923            if isinstance(value, QName) and value.text not in qnames:
924                add_qname(value.text)
925        text = elem.text
926        if isinstance(text, QName) and text.text not in qnames:
927            add_qname(text.text)
928    return qnames, namespaces
929
930def _serialize_xml(write, elem, encoding, qnames, namespaces):
931    tag = elem.tag
932    text = elem.text
933    if tag is Comment:
934        write("<!--%s-->" % _escape_cdata(text, encoding))
935    elif tag is ProcessingInstruction:
936        write("<?%s?>" % _escape_cdata(text, encoding))
937    else:
938        tag = qnames[tag]
939        if tag is None:
940            if text:
941                write(_escape_cdata(text, encoding))
942            for e in elem:
943                _serialize_xml(write, e, encoding, qnames, None)
944        else:
945            write("<" + tag)
946            items = elem.items()
947            if items or namespaces:
948                items.sort() # lexical order
949                if namespaces:
950                    i = namespaces.items()
951                    try:
952                        i.sort(key=lambda x: x[1]) # sort on prefix
953                    except TypeError:
954                        i.sort(lambda a, b: cmp(a[1], b[1])) # 2.3
955                    for v, k in i:
956                        if k:
957                            k = ":" + k
958                        write(" xmlns%s=\"%s\"" % (
959                            k.encode(encoding),
960                            _escape_attrib(v, encoding)
961                            ))
962                for k, v in items:
963                    if isinstance(k, QName):
964                        k = k.text
965                    if isinstance(v, QName):
966                        v = qnames[v.text]
967                    else:
968                        v = _escape_attrib(v, encoding)
969                    write(" %s=\"%s\"" % (qnames[k], v))
970            if text or len(elem):
971                write(">")
972                if text:
973                    write(_escape_cdata(text, encoding))
974                for e in elem:
975                    _serialize_xml(write, e, encoding, qnames, None)
976                write("</" + tag + ">")
977            else:
978                write(" />")
979    if elem.tail:
980        write(_escape_cdata(elem.tail, encoding))
981
982HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
983              "img", "input", "isindex", "link", "meta" "param")
984
985try:
986    HTML_EMPTY = set(HTML_EMPTY)
987except NameError:
988    pass
989
990def _serialize_html(write, elem, encoding, qnames, namespaces):
991    tag = elem.tag
992    text = elem.text
993    if tag is Comment:
994        write("<!--%s-->" % _escape_cdata(text, encoding))
995    elif tag is ProcessingInstruction:
996        write("<?%s?>" % _escape_cdata(text, encoding))
997    else:
998        tag = qnames[tag]
999        if tag is None:
1000            if text:
1001                write(_escape_cdata(text, encoding))
1002            for e in elem:
1003                _serialize_html(write, e, encoding, qnames, None)
1004        else:
1005            write("<" + tag)
1006            items = elem.items()
1007            if items or namespaces:
1008                if namespaces:
1009                    i = namespaces.items()
1010                    try:
1011                        i.sort(key=lambda x: x[1]) # sort on prefix
1012                    except TypeError:
1013                        i.sort(lambda a, b: cmp(a[1], b[1])) # 2.3
1014                    for v, k in i:
1015                        if k:
1016                            k = ":" + k
1017                        write(" xmlns%s=\"%s\"" % (
1018                            k.encode(encoding),
1019                            _escape_attrib(v, encoding)
1020                            ))
1021                items.sort() # lexical order
1022                for k, v in items:
1023                    if isinstance(k, QName):
1024                        k = k.text
1025                    if isinstance(v, QName):
1026                        v = qnames[v.text]
1027                    else:
1028                        v = _escape_attrib_html(v, encoding)
1029                    # FIXME: handle boolean attributes
1030                    write(" %s=\"%s\"" % (qnames[k], v))
1031            write(">")
1032            tag = tag.lower()
1033            if text:
1034                if tag == "script" or tag == "style":
1035                    write(_encode(text, encoding))
1036                else:
1037                    write(_escape_cdata(text, encoding))
1038            for e in elem:
1039                _serialize_html(write, e, encoding, qnames, None)
1040            if tag not in HTML_EMPTY:
1041                write("</" + tag + ">")
1042    if elem.tail:
1043        write(_escape_cdata(elem.tail, encoding))
1044
1045def _serialize_text(write, elem, encoding):
1046    for part in elem.itertext():
1047        write(part.encode(encoding))
1048    if elem.tail:
1049        write(elem.tail.encode(encoding))
1050
1051##
1052# Registers a namespace prefix.  The registry is global, and any
1053# existing mapping for either the given prefix or the namespace URI
1054# will be removed.
1055#
1056# @param prefix Namespace prefix.
1057# @param uri Namespace uri.  Tags and attributes in this namespace
1058#     will be serialized with the given prefix, if at all possible.
1059# @exception ValueError If the prefix is reserved, or is otherwise
1060#     invalid.
1061
1062def register_namespace(prefix, uri):
1063    if re.match("ns\d+$", prefix):
1064        raise ValueError("Prefix format reserved for internal use")
1065    for k, v in _namespace_map.items():
1066        if k == uri or v == prefix:
1067            del _namespace_map[k]
1068    _namespace_map[uri] = prefix
1069
1070_namespace_map = {
1071    # "well-known" namespace prefixes
1072    "http://www.w3.org/XML/1998/namespace": "xml",
1073    "http://www.w3.org/1999/xhtml": "html",
1074    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1075    "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1076    # xml schema
1077    "http://www.w3.org/2001/XMLSchema": "xs",
1078    "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1079    # dublic core
1080    "http://purl.org/dc/elements/1.1/": "dc",
1081}
1082
1083def _raise_serialization_error(text):
1084    raise TypeError(
1085        "cannot serialize %r (type %s)" % (text, type(text).__name__)
1086        )
1087
1088def _encode(text, encoding):
1089    try:
1090        return text.encode(encoding, "xmlcharrefreplace")
1091    except (TypeError, AttributeError):
1092        _raise_serialization_error(text)
1093
1094def _escape_cdata(text, encoding):
1095    # escape character data
1096    try:
1097        # it's worth avoiding do-nothing calls for strings that are
1098        # shorter than 500 character, or so.  assume that's, by far,
1099        # the most common case in most applications.
1100        if "&" in text:
1101            text = text.replace("&", "&amp;")
1102        if "<" in text:
1103            text = text.replace("<", "&lt;")
1104        if ">" in text:
1105            text = text.replace(">", "&gt;")
1106        return text.encode(encoding, "xmlcharrefreplace")
1107    except (TypeError, AttributeError):
1108        _raise_serialization_error(text)
1109
1110def _escape_attrib(text, encoding):
1111    # escape attribute value
1112    try:
1113        if "&" in text:
1114            text = text.replace("&", "&amp;")
1115        if "<" in text:
1116            text = text.replace("<", "&lt;")
1117        if ">" in text:
1118            text = text.replace(">", "&gt;")
1119        if "\"" in text:
1120            text = text.replace("\"", "&quot;")
1121        if "\n" in text:
1122            text = text.replace("\n", "&#10;")
1123        return text.encode(encoding, "xmlcharrefreplace")
1124    except (TypeError, AttributeError):
1125        _raise_serialization_error(text)
1126
1127def _escape_attrib_html(text, encoding):
1128    # escape attribute value
1129    try:
1130        if "&" in text:
1131            text = text.replace("&", "&amp;")
1132        if ">" in text:
1133            text = text.replace(">", "&gt;")
1134        if "\"" in text:
1135            text = text.replace("\"", "&quot;")
1136        return text.encode(encoding, "xmlcharrefreplace")
1137    except (TypeError, AttributeError):
1138        _raise_serialization_error(text)
1139
1140# --------------------------------------------------------------------
1141
1142##
1143# Generates a string representation of an XML element, including all
1144# subelements.
1145#
1146# @param element An Element instance.
1147# @return An encoded string containing the XML data.
1148# @defreturn string
1149
1150def tostring(element, encoding=None, method=None):
1151    class dummy:
1152        pass
1153    data = []
1154    file = dummy()
1155    file.write = data.append
1156    ElementTree(element).write(file, encoding, method=method)
1157    return "".join(data)
1158
1159##
1160# Generates a string representation of an XML element, including all
1161# subelements.  The string is returned as a sequence of string fragments.
1162#
1163# @param element An Element instance.
1164# @return A sequence object containing the XML data.
1165# @defreturn sequence
1166# @since 1.3
1167
1168def tostringlist(element, encoding=None):
1169    class dummy:
1170        pass
1171    data = []
1172    file = dummy()
1173    file.write = data.append
1174    ElementTree(element).write(file, encoding)
1175    # FIXME: merge small fragments into larger parts
1176    return data
1177
1178##
1179# Writes an element tree or element structure to sys.stdout.  This
1180# function should be used for debugging only.
1181# <p>
1182# The exact output format is implementation dependent.  In this
1183# version, it's written as an ordinary XML file.
1184#
1185# @param elem An element tree or an individual element.
1186
1187def dump(elem):
1188    # debugging
1189    if not isinstance(elem, ElementTree):
1190        elem = ElementTree(elem)
1191    elem.write(sys.stdout)
1192    tail = elem.getroot().tail
1193    if not tail or tail[-1] != "\n":
1194        sys.stdout.write("\n")
1195
1196# --------------------------------------------------------------------
1197# parsing
1198
1199##
1200# Parses an XML document into an element tree.
1201#
1202# @param source A filename or file object containing XML data.
1203# @param parser An optional parser instance.  If not given, the
1204#     standard {@link XMLParser} parser is used.
1205# @return An ElementTree instance
1206
1207def parse(source, parser=None):
1208    tree = ElementTree()
1209    tree.parse(source, parser)
1210    return tree
1211
1212##
1213# Parses an XML document into an element tree incrementally, and reports
1214# what's going on to the user.
1215#
1216# @param source A filename or file object containing XML data.
1217# @param events A list of events to report back.  If omitted, only "end"
1218#     events are reported.
1219# @param parser An optional parser instance.  If not given, the
1220#     standard {@link XMLParser} parser is used.
1221# @return A (event, elem) iterator.
1222
1223def iterparse(source, events=None, parser=None):
1224    if not hasattr(source, "read"):
1225        source = open(source, "rb")
1226    if not parser:
1227        parser = XMLParser(target=TreeBuilder())
1228    return _IterParseIterator(source, events, parser)
1229
1230class _IterParseIterator(object):
1231
1232    def __init__(self, source, events, parser):
1233        self._file = source
1234        self._events = []
1235        self._index = 0
1236        self.root = self._root = None
1237        self._parser = parser
1238        # wire up the parser for event reporting
1239        parser = self._parser._parser
1240        append = self._events.append
1241        if events is None:
1242            events = ["end"]
1243        for event in events:
1244            if event == "start":
1245                try:
1246                    parser.ordered_attributes = 1
1247                    parser.specified_attributes = 1
1248                    def handler(tag, attrib_in, event=event, append=append,
1249                                start=self._parser._start_list):
1250                        append((event, start(tag, attrib_in)))
1251                    parser.StartElementHandler = handler
1252                except AttributeError:
1253                    def handler(tag, attrib_in, event=event, append=append,
1254                                start=self._parser._start):
1255                        append((event, start(tag, attrib_in)))
1256                    parser.StartElementHandler = handler
1257            elif event == "end":
1258                def handler(tag, event=event, append=append,
1259                            end=self._parser._end):
1260                    append((event, end(tag)))
1261                parser.EndElementHandler = handler
1262            elif event == "start-ns":
1263                def handler(prefix, uri, event=event, append=append):
1264                    try:
1265                        uri = (uri or "").encode("ascii")
1266                    except UnicodeError:
1267                        pass
1268                    append((event, (prefix or "", uri or "")))
1269                parser.StartNamespaceDeclHandler = handler
1270            elif event == "end-ns":
1271                def handler(prefix, event=event, append=append):
1272                    append((event, None))
1273                parser.EndNamespaceDeclHandler = handler
1274
1275    def next(self):
1276        while 1:
1277            try:
1278                item = self._events[self._index]
1279            except IndexError:
1280                if self._parser is None:
1281                    self.root = self._root
1282                    raise StopIteration
1283                # load event buffer
1284                del self._events[:]
1285                self._index = 0
1286                data = self._file.read(16384)
1287                if data:
1288                    self._parser.feed(data)
1289                else:
1290                    self._root = self._parser.close()
1291                    self._parser = None
1292            else:
1293                self._index = self._index + 1
1294                return item
1295
1296    def __iter__(self):
1297        return self
1298
1299##
1300# Parses an XML document from a string constant.  This function can
1301# be used to embed "XML literals" in Python code.
1302#
1303# @param source A string containing XML data.
1304# @param parser An optional parser instance.  If not given, the
1305#     standard {@link XMLParser} parser is used.
1306# @return An Element instance.
1307# @defreturn Element
1308
1309def XML(text, parser=None):
1310    if not parser:
1311        parser = XMLParser(target=TreeBuilder())
1312    parser.feed(text)
1313    return parser.close()
1314
1315##
1316# Parses an XML document from a string constant, and also returns
1317# a dictionary which maps from element id:s to elements.
1318#
1319# @param source A string containing XML data.
1320# @param parser An optional parser instance.  If not given, the
1321#     standard {@link XMLParser} parser is used.
1322# @return A tuple containing an Element instance and a dictionary.
1323# @defreturn (Element, dictionary)
1324
1325def XMLID(text, parser=None):
1326    if not parser:
1327        parser = XMLParser(target=TreeBuilder())
1328    parser.feed(text)
1329    tree = parser.close()
1330    ids = {}
1331    for elem in tree.getiterator():
1332        id = elem.get("id")
1333        if id:
1334            ids[id] = elem
1335    return tree, ids
1336
1337##
1338# Parses an XML document from a string constant.  Same as {@link #XML}.
1339#
1340# @def fromstring(text)
1341# @param source A string containing XML data.
1342# @return An Element instance.
1343# @defreturn Element
1344
1345fromstring = XML
1346
1347##
1348# Parses an XML document from a sequence of string fragments.
1349#
1350# @param sequence A list or other sequence containing XML data fragments.
1351# @param parser An optional parser instance.  If not given, the
1352#     standard {@link XMLParser} parser is used.
1353# @return An Element instance.
1354# @defreturn Element
1355# @since 1.3
1356
1357def fromstringlist(sequence, parser=None):
1358    if not parser:
1359        parser = XMLParser(target=TreeBuilder())
1360    for text in sequence:
1361        parser.feed(text)
1362    return parser.close()
1363
1364# --------------------------------------------------------------------
1365
1366##
1367# Generic element structure builder.  This builder converts a sequence
1368# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
1369# #TreeBuilder.end} method calls to a well-formed element structure.
1370# <p>
1371# You can use this class to build an element structure using a custom XML
1372# parser, or a parser for some other XML-like format.
1373#
1374# @param element_factory Optional element factory.  This factory
1375#    is called to create new Element instances, as necessary.
1376
1377class TreeBuilder(object):
1378
1379    def __init__(self, element_factory=None):
1380        self._data = [] # data collector
1381        self._elem = [] # element stack
1382        self._last = None # last element
1383        self._tail = None # true if we're after an end tag
1384        if element_factory is None:
1385            element_factory = Element
1386        self._factory = element_factory
1387
1388    ##
1389    # Flushes the builder buffers, and returns the toplevel document
1390    # element.
1391    #
1392    # @return An Element instance.
1393    # @defreturn Element
1394
1395    def close(self):
1396        assert len(self._elem) == 0, "missing end tags"
1397        assert self._last != None, "missing toplevel element"
1398        return self._last
1399
1400    def _flush(self):
1401        if self._data:
1402            if self._last is not None:
1403                text = "".join(self._data)
1404                if self._tail:
1405                    assert self._last.tail is None, "internal error (tail)"
1406                    self._last.tail = text
1407                else:
1408                    assert self._last.text is None, "internal error (text)"
1409                    self._last.text = text
1410            self._data = []
1411
1412    ##
1413    # Adds text to the current element.
1414    #
1415    # @param data A string.  This should be either an 8-bit string
1416    #    containing ASCII text, or a Unicode string.
1417
1418    def data(self, data):
1419        self._data.append(data)
1420
1421    ##
1422    # Opens a new element.
1423    #
1424    # @param tag The element name.
1425    # @param attrib A dictionary containing element attributes.
1426    # @return The opened element.
1427    # @defreturn Element
1428
1429    def start(self, tag, attrs):
1430        self._flush()
1431        self._last = elem = self._factory(tag, attrs)
1432        if self._elem:
1433            self._elem[-1].append(elem)
1434        self._elem.append(elem)
1435        self._tail = 0
1436        return elem
1437
1438    ##
1439    # Closes the current element.
1440    #
1441    # @param tag The element name.
1442    # @return The closed element.
1443    # @defreturn Element
1444
1445    def end(self, tag):
1446        self._flush()
1447        self._last = self._elem.pop()
1448        assert self._last.tag == tag,\
1449               "end tag mismatch (expected %s, got %s)" % (
1450                   self._last.tag, tag)
1451        self._tail = 1
1452        return self._last
1453
1454##
1455# Element structure builder for XML source data, based on the
1456# <b>expat</b> parser.
1457#
1458# @keyparam target Target object.  If omitted, the builder uses an
1459#     instance of the standard {@link #TreeBuilder} class.
1460# @keyparam html Predefine HTML entities.  This flag is not supported
1461#     by the current implementation.
1462# @keyparam encoding Optional encoding.  If given, the value overrides
1463#     the encoding specified in the XML file.
1464# @see #ElementTree
1465# @see #TreeBuilder
1466
1467class XMLParser(object):
1468
1469    def __init__(self, html=0, target=None, encoding=None):
1470        try:
1471            from xml.parsers import expat
1472        except ImportError:
1473            try:
1474                import pyexpat; expat = pyexpat
1475            except ImportError:
1476                raise ImportError(
1477                    "No module named expat; use SimpleXMLTreeBuilder instead"
1478                    )
1479        parser = expat.ParserCreate(encoding, "}")
1480        if target is None:
1481            target = TreeBuilder()
1482        # underscored names are provided for compatibility only
1483        self.parser = self._parser = parser
1484        self.target = self._target = target
1485        self._error = expat.error
1486        self._names = {} # name memo cache
1487        # callbacks
1488        parser.DefaultHandlerExpand = self._default
1489        parser.StartElementHandler = self._start
1490        parser.EndElementHandler = self._end
1491        parser.CharacterDataHandler = self._data
1492        # let expat do the buffering, if supported
1493        try:
1494            self._parser.buffer_text = 1
1495        except AttributeError:
1496            pass
1497        # use new-style attribute handling, if supported
1498        try:
1499            self._parser.ordered_attributes = 1
1500            self._parser.specified_attributes = 1
1501            parser.StartElementHandler = self._start_list
1502        except AttributeError:
1503            pass
1504        self._doctype = None
1505        self.entity = {}
1506        try:
1507            self.version = "Expat %d.%d.%d" % expat.version_info
1508        except AttributeError:
1509            pass # unknown
1510
1511    def _raiseerror(self, value):
1512        err = ParseError(value)
1513        err.code = value.code
1514        err.position = value.lineno, value.offset
1515        raise err
1516
1517    def _fixtext(self, text):
1518        # convert text string to ascii, if possible
1519        try:
1520            return text.encode("ascii")
1521        except UnicodeError:
1522            return text
1523
1524    def _fixname(self, key):
1525        # expand qname, and convert name string to ascii, if possible
1526        try:
1527            name = self._names[key]
1528        except KeyError:
1529            name = key
1530            if "}" in name:
1531                name = "{" + name
1532            self._names[key] = name = self._fixtext(name)
1533        return name
1534
1535    def _start(self, tag, attrib_in):
1536        fixname = self._fixname
1537        fixtext = self._fixtext
1538        tag = fixname(tag)
1539        attrib = {}
1540        for key, value in attrib_in.items():
1541            attrib[fixname(key)] = fixtext(value)
1542        return self.target.start(tag, attrib)
1543
1544    def _start_list(self, tag, attrib_in):
1545        fixname = self._fixname
1546        fixtext = self._fixtext
1547        tag = fixname(tag)
1548        attrib = {}
1549        if attrib_in:
1550            for i in range(0, len(attrib_in), 2):
1551                attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1])
1552        return self.target.start(tag, attrib)
1553
1554    def _data(self, text):
1555        return self.target.data(self._fixtext(text))
1556
1557    def _end(self, tag):
1558        return self.target.end(self._fixname(tag))
1559
1560    def _default(self, text):
1561        prefix = text[:1]
1562        if prefix == "&":
1563            # deal with undefined entities
1564            try:
1565                self.target.data(self.entity[text[1:-1]])
1566            except KeyError:
1567                from xml.parsers import expat
1568                err = expat.error(
1569                    "undefined entity %s: line %d, column %d" %
1570                    (text, self._parser.ErrorLineNumber,
1571                    self._parser.ErrorColumnNumber)
1572                    )
1573                err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
1574                err.lineno = self._parser.ErrorLineNumber
1575                err.offset = self._parser.ErrorColumnNumber
1576                raise err
1577        elif prefix == "<" and text[:9] == "<!DOCTYPE":
1578            self._doctype = [] # inside a doctype declaration
1579        elif self._doctype is not None:
1580            # parse doctype contents
1581            if prefix == ">":
1582                self._doctype = None
1583                return
1584            text = text.strip()
1585            if not text:
1586                return
1587            self._doctype.append(text)
1588            n = len(self._doctype)
1589            if n > 2:
1590                type = self._doctype[1]
1591                if type == "PUBLIC" and n == 4:
1592                    name, type, pubid, system = self._doctype
1593                elif type == "SYSTEM" and n == 3:
1594                    name, type, system = self._doctype
1595                    pubid = None
1596                else:
1597                    return
1598                if pubid:
1599                    pubid = pubid[1:-1]
1600                if hasattr(self.target, "doctype"):
1601                    self.target.doctype(name, pubid, system[1:-1])
1602                self._doctype = None
1603
1604    ##
1605    # Feeds data to the parser.
1606    #
1607    # @param data Encoded data.
1608
1609    def feed(self, data):
1610        try:
1611            self._parser.Parse(data, 0)
1612        except self._error, v:
1613            self._raiseerror(v)
1614
1615    ##
1616    # Finishes feeding data to the parser.
1617    #
1618    # @return An element structure.
1619    # @defreturn Element
1620
1621    def close(self):
1622        try:
1623            self._parser.Parse("", 1) # end of data
1624        except self._error, v:
1625            self._raiseerror(v)
1626        tree = self.target.close()
1627        del self.target, self._parser # get rid of circular references
1628        return tree
1629
1630# compatibility
1631XMLTreeBuilder = XMLParser
Note: See TracBrowser for help on using the repository browser.