source: exist/trunk/python/elementtree-1.3/selftest.py @ 3150

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/elementtree-1.3/selftest.py@3150
Revision 3150, 30.6 KB checked in by lawrence, 12 years ago (diff)

woops, we didn't include elementtree itself.

Line 
1# $Id: selftest.py 3276 2007-09-12 06:52:30Z fredrik $
2# -*- coding: iso-8859-1 -*-
3# elementtree selftest program
4
5# this test script uses Python's "doctest" module to check that the
6# *test script* works as expected.
7
8# TODO: add more elementtree method tests
9# TODO: add xml/html parsing tests
10# TODO: etc
11
12import sys, string, StringIO
13
14from elementtree import ElementTree
15from elementtree import ElementPath
16from elementtree import ElementInclude
17from elementtree import HTMLTreeBuilder
18from elementtree import SimpleXMLWriter
19
20ET = ElementTree
21
22def serialize(elem, **options):
23    import StringIO
24    file = StringIO.StringIO()
25    tree = ElementTree.ElementTree(elem)
26    tree.write(file, **options)
27    return file.getvalue()
28
29def summarize(elem):
30    return elem.tag
31
32def summarize_list(seq):
33    return map(summarize, seq)
34
35def normalize_crlf(tree):
36    for elem in tree.getiterator():
37        if elem.text: elem.text = string.replace(elem.text, "\r\n", "\n")
38        if elem.tail: elem.tail = string.replace(elem.tail, "\r\n", "\n")
39
40SAMPLE_XML = ElementTree.XML("""
41<body>
42  <tag class='a'>text</tag>
43  <tag class='b' />
44  <section>
45    <tag class='b' id='inner'>subtext</tag>
46  </section>
47</body>
48""")
49
50#
51# interface tests
52
53def check_string(string):
54    len(string)
55    for char in string:
56        if len(char) != 1:
57            print "expected one-character string, got %r" % char
58    new_string = string + ""
59    new_string = string + " "
60    string[:0]
61
62def check_string_or_none(value):
63    if value is None:
64        return
65    return check_string(value)
66
67def check_mapping(mapping):
68    len(mapping)
69    keys = mapping.keys()
70    items = mapping.items()
71    for key in keys:
72        item = mapping[key]
73    mapping["key"] = "value"
74    if mapping["key"] != "value":
75        print "expected value string, got %r" % mapping["key"]
76
77def check_element(element):
78    if not hasattr(element, "tag"):
79        print "no tag member"
80    if not hasattr(element, "attrib"):
81        print "no attrib member"
82    if not hasattr(element, "text"):
83        print "no text member"
84    if not hasattr(element, "tail"):
85        print "no tail member"
86    check_string(element.tag)
87    check_mapping(element.attrib)
88    check_string_or_none(element.text)
89    check_string_or_none(element.tail)
90    for elem in element:
91        check_element(elem)
92
93def check_element_tree(tree):
94    check_element(tree.getroot())
95
96# --------------------------------------------------------------------
97# element tree tests
98
99def sanity():
100    """
101    >>> from elementtree.ElementTree import *
102    >>> from elementtree.ElementInclude import *
103    >>> from elementtree.ElementPath import *
104    >>> from elementtree.HTMLTreeBuilder import *
105    >>> from elementtree.SimpleXMLWriter import *
106    >>> from elementtree.TidyTools import *
107    """
108
109def version():
110    """
111    >>> ElementTree.VERSION
112    '1.3a2'
113    """
114
115def interface():
116    """
117    Test element tree interface.
118
119    >>> element = ElementTree.Element("tag")
120    >>> check_element(element)
121    >>> tree = ElementTree.ElementTree(element)
122    >>> check_element_tree(tree)
123    """
124
125def simpleops():
126    """
127    >>> elem = ElementTree.XML("<body><tag/></body>")
128    >>> serialize(elem)
129    '<body><tag /></body>'
130    >>> e = ElementTree.Element("tag2")
131    >>> elem.append(e)
132    >>> serialize(elem)
133    '<body><tag /><tag2 /></body>'
134    >>> elem.remove(e)
135    >>> serialize(elem)
136    '<body><tag /></body>'
137    >>> elem.insert(0, e)
138    >>> serialize(elem)
139    '<body><tag2 /><tag /></body>'
140    >>> elem.remove(e)
141    >>> elem.extend([e])
142    >>> serialize(elem)
143    '<body><tag /><tag2 /></body>'
144    >>> elem.remove(e)
145    """
146
147def simplefind():
148    """
149    Test find methods using the elementpath fallback.
150
151    >>> CurrentElementPath = ElementTree.ElementPath
152    >>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
153    >>> elem = SAMPLE_XML
154    >>> elem.find("tag").tag
155    'tag'
156    >>> ElementTree.ElementTree(elem).find("tag").tag
157    'tag'
158    >>> elem.findtext("tag")
159    'text'
160    >>> elem.findtext("tog")
161    >>> elem.findtext("tog", "default")
162    'default'
163    >>> ElementTree.ElementTree(elem).findtext("tag")
164    'text'
165    >>> summarize_list(elem.findall("tag"))
166    ['tag', 'tag']
167    >>> summarize_list(elem.findall(".//tag"))
168    ['tag', 'tag', 'tag']
169
170    Path syntax doesn't work in this case.
171
172    >>> elem.find("section/tag")
173    >>> elem.findtext("section/tag")
174    >>> elem.findall("section/tag")
175    []
176
177    >>> ElementTree.ElementPath = CurrentElementPath
178    """
179
180def find():
181    """
182    Test find methods (including xpath syntax).
183
184    >>> elem = SAMPLE_XML
185    >>> elem.find("tag").tag
186    'tag'
187    >>> ElementTree.ElementTree(elem).find("tag").tag
188    'tag'
189    >>> elem.find("section/tag").tag
190    'tag'
191    >>> ElementTree.ElementTree(elem).find("section/tag").tag
192    'tag'
193    >>> elem.findtext("tag")
194    'text'
195    >>> elem.findtext("tog")
196    >>> elem.findtext("tog", "default")
197    'default'
198    >>> ElementTree.ElementTree(elem).findtext("tag")
199    'text'
200    >>> elem.findtext("section/tag")
201    'subtext'
202    >>> ElementTree.ElementTree(elem).findtext("section/tag")
203    'subtext'
204    >>> summarize_list(elem.findall("tag"))
205    ['tag', 'tag']
206    >>> summarize_list(elem.findall("*"))
207    ['tag', 'tag', 'section']
208    >>> summarize_list(elem.findall(".//tag"))
209    ['tag', 'tag', 'tag']
210    >>> summarize_list(elem.findall("section/tag"))
211    ['tag']
212    >>> summarize_list(elem.findall("section//tag"))
213    ['tag']
214    >>> summarize_list(elem.findall("section/*"))
215    ['tag']
216    >>> summarize_list(elem.findall("section//*"))
217    ['tag']
218    >>> summarize_list(elem.findall("section/.//*"))
219    ['tag']
220    >>> summarize_list(elem.findall("*/*"))
221    ['tag']
222    >>> summarize_list(elem.findall("*//*"))
223    ['tag']
224    >>> summarize_list(elem.findall("*/tag"))
225    ['tag']
226    >>> summarize_list(elem.findall("*/./tag"))
227    ['tag']
228    >>> summarize_list(elem.findall("./tag"))
229    ['tag', 'tag']
230    >>> summarize_list(elem.findall(".//tag"))
231    ['tag', 'tag', 'tag']
232    >>> summarize_list(elem.findall("././tag"))
233    ['tag', 'tag']
234    >>> summarize_list(elem.findall(".//tag[@class]"))
235    ['tag', 'tag', 'tag']
236    >>> summarize_list(elem.findall(".//tag[@class='a']"))
237    ['tag']
238    >>> summarize_list(elem.findall(".//tag[@class='b']"))
239    ['tag', 'tag']
240    >>> summarize_list(elem.findall(".//tag[@id]"))
241    ['tag']
242    >>> summarize_list(elem.findall(".//section[tag]"))
243    ['section']
244    >>> summarize_list(elem.findall(".//section[element]"))
245    []
246    >>> summarize_list(elem.findall("../tag"))
247    []
248    >>> summarize_list(elem.findall("section/../tag"))
249    ['tag', 'tag']
250    >>> summarize_list(ElementTree.ElementTree(elem).findall("./tag"))
251    ['tag', 'tag']
252
253    FIXME: ET's Path module handles this case incorrectly; this gives
254    a warning in 1.3, and the behaviour will be modified in 1.4.
255
256    >>> summarize_list(ElementTree.ElementTree(elem).findall("/tag"))
257    ['tag', 'tag']
258    """
259
260def bad_find():
261    """
262    Check bad or unsupported path expressions.
263
264    >>> elem = SAMPLE_XML
265    >>> elem.findall("/tag")
266    Traceback (most recent call last):
267    SyntaxError: cannot use absolute path on element
268    >>> elem.findall("section//")
269    Traceback (most recent call last):
270    SyntaxError: invalid path
271    """
272
273def parsefile():
274    """
275    Test parsing from file.
276
277    >>> tree = ElementTree.parse("samples/simple.xml")
278    >>> normalize_crlf(tree)
279    >>> tree.write(sys.stdout)
280    <root>
281       <element key="value">text</element>
282       <element>text</element>tail
283       <empty-element />
284    </root>
285    >>> tree = ElementTree.parse("samples/simple-ns.xml")
286    >>> normalize_crlf(tree)
287    >>> tree.write(sys.stdout)
288    <ns0:root xmlns:ns0="namespace">
289       <ns0:element key="value">text</ns0:element>
290       <ns0:element>text</ns0:element>tail
291       <ns0:empty-element />
292    </ns0:root>
293    """
294
295def parsehtml():
296    """
297    Test HTML parsing.
298
299    >>> p = HTMLTreeBuilder.TreeBuilder()
300    >>> p.feed("<p><p>spam<b>egg</b></p>")
301    >>> serialize(p.close())
302    '<p>spam<b>egg</b></p>'
303    """
304
305def parseliteral():
306    r"""
307    >>> element = ElementTree.XML("<html><body>text</body></html>")
308    >>> ElementTree.ElementTree(element).write(sys.stdout)
309    <html><body>text</body></html>
310    >>> element = ElementTree.fromstring("<html><body>text</body></html>")
311    >>> ElementTree.ElementTree(element).write(sys.stdout)
312    <html><body>text</body></html>
313    >>> sequence = ["<html><body>", "text</bo", "dy></html>"]
314    >>> element = ElementTree.fromstringlist(sequence)
315    >>> ElementTree.ElementTree(element).write(sys.stdout)
316    <html><body>text</body></html>
317    >>> print ElementTree.tostring(element)
318    <html><body>text</body></html>
319    >>> print "".join(ElementTree.tostringlist(element))
320    <html><body>text</body></html>
321    >>> print ElementTree.tostring(element, "ascii")
322    <?xml version='1.0' encoding='ascii'?>
323    <html><body>text</body></html>
324    >>> _, ids = ElementTree.XMLID("<html><body>text</body></html>")
325    >>> len(ids)
326    0
327    >>> _, ids = ElementTree.XMLID("<html><body id='body'>text</body></html>")
328    >>> len(ids)
329    1
330    >>> ids["body"].tag
331    'body'
332    """
333
334def iterparse():
335    """
336    Test iterparse interface.
337
338    >>> iterparse = ElementTree.iterparse
339
340    >>> context = iterparse("samples/simple.xml")
341    >>> for action, elem in context:
342    ...   print action, elem.tag
343    end element
344    end element
345    end empty-element
346    end root
347    >>> context.root.tag
348    'root'
349
350    >>> context = iterparse("samples/simple-ns.xml")
351    >>> for action, elem in context:
352    ...   print action, elem.tag
353    end {namespace}element
354    end {namespace}element
355    end {namespace}empty-element
356    end {namespace}root
357
358    >>> events = ()
359    >>> context = iterparse("samples/simple.xml", events)
360    >>> for action, elem in context:
361    ...   print action, elem.tag
362
363    >>> events = ()
364    >>> context = iterparse("samples/simple.xml", events=events)
365    >>> for action, elem in context:
366    ...   print action, elem.tag
367
368    >>> events = ("start", "end")
369    >>> context = iterparse("samples/simple.xml", events)
370    >>> for action, elem in context:
371    ...   print action, elem.tag
372    start root
373    start element
374    end element
375    start element
376    end element
377    start empty-element
378    end empty-element
379    end root
380
381    >>> events = ("start", "end", "start-ns", "end-ns")
382    >>> context = iterparse("samples/simple-ns.xml", events)
383    >>> for action, elem in context:
384    ...   if action in ("start", "end"):
385    ...     print action, elem.tag
386    ...   else:
387    ...     print action, elem
388    start-ns ('', 'namespace')
389    start {namespace}root
390    start {namespace}element
391    end {namespace}element
392    start {namespace}element
393    end {namespace}element
394    start {namespace}empty-element
395    end {namespace}empty-element
396    end {namespace}root
397    end-ns None
398
399    """
400
401def writefile():
402    """
403    >>> elem = ElementTree.Element("tag")
404    >>> elem.text = "text"
405    >>> serialize(elem)
406    '<tag>text</tag>'
407    >>> ElementTree.SubElement(elem, "subtag").text = "subtext"
408    >>> serialize(elem)
409    '<tag>text<subtag>subtext</subtag></tag>'
410
411    Test tag suppression
412    >>> elem.tag = None
413    >>> serialize(elem)
414    'text<subtag>subtext</subtag>'
415    """
416
417def writestring():
418    """
419    >>> elem = ElementTree.XML("<html><body>text</body></html>")
420    >>> ElementTree.tostring(elem)
421    '<html><body>text</body></html>'
422    >>> elem = ElementTree.fromstring("<html><body>text</body></html>")
423    >>> ElementTree.tostring(elem)
424    '<html><body>text</body></html>'
425    """
426
427def encoding():
428    r"""
429    Test encoding issues.
430
431    >>> elem = ElementTree.Element("tag")
432    >>> elem.text = u"abc"
433    >>> serialize(elem)
434    '<tag>abc</tag>'
435    >>> serialize(elem, encoding="utf-8")
436    '<tag>abc</tag>'
437    >>> serialize(elem, encoding="us-ascii")
438    '<tag>abc</tag>'
439    >>> serialize(elem, encoding="iso-8859-1")
440    "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
441
442    >>> elem.text = "<&\"\'>"
443    >>> serialize(elem)
444    '<tag>&lt;&amp;"\'&gt;</tag>'
445    >>> serialize(elem, encoding="utf-8")
446    '<tag>&lt;&amp;"\'&gt;</tag>'
447    >>> serialize(elem, encoding="us-ascii") # cdata characters
448    '<tag>&lt;&amp;"\'&gt;</tag>'
449    >>> serialize(elem, encoding="iso-8859-1")
450    '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag>&lt;&amp;"\'&gt;</tag>'
451
452    >>> elem.attrib["key"] = "<&\"\'>"
453    >>> elem.text = None
454    >>> serialize(elem)
455    '<tag key="&lt;&amp;&quot;\'&gt;" />'
456    >>> serialize(elem, encoding="utf-8")
457    '<tag key="&lt;&amp;&quot;\'&gt;" />'
458    >>> serialize(elem, encoding="us-ascii")
459    '<tag key="&lt;&amp;&quot;\'&gt;" />'
460    >>> serialize(elem, encoding="iso-8859-1")
461    '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="&lt;&amp;&quot;\'&gt;" />'
462
463    >>> elem.text = u'\xe5\xf6\xf6<>'
464    >>> elem.attrib.clear()
465    >>> serialize(elem)
466    '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
467    >>> serialize(elem, encoding="utf-8")
468    '<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>'
469    >>> serialize(elem, encoding="us-ascii")
470    '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
471    >>> serialize(elem, encoding="iso-8859-1")
472    "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6&lt;&gt;</tag>"
473
474    >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>'
475    >>> elem.text = None
476    >>> serialize(elem)
477    '<tag key="&#229;&#246;&#246;&lt;&gt;" />'
478    >>> serialize(elem, encoding="utf-8")
479    '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />'
480    >>> serialize(elem, encoding="us-ascii")
481    '<tag key="&#229;&#246;&#246;&lt;&gt;" />'
482    >>> serialize(elem, encoding="iso-8859-1")
483    '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6&lt;&gt;" />'
484
485    """
486
487def methods():
488    r"""
489    Test serialization methods.
490
491    >>> e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
492    >>> e.tail = "\n"
493    >>> serialize(e)
494    '<html><link /><script>1 &lt; 2</script></html>\n'
495    >>> serialize(e, method=None)
496    '<html><link /><script>1 &lt; 2</script></html>\n'
497    >>> serialize(e, method="xml")
498    '<html><link /><script>1 &lt; 2</script></html>\n'
499    >>> serialize(e, method="html")
500    '<html><link><script>1 < 2</script></html>\n'
501    >>> serialize(e, method="text")
502    '1 < 2\n'
503
504    """
505
506def iterators():
507    """
508    Test iterators.
509
510    >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
511    >>> summarize_list(e.iter())
512    ['html', 'body', 'i']
513    >>> summarize_list(e.find("body").iter())
514    ['body', 'i']
515    >>> "".join(e.itertext())
516    'this is a paragraph...'
517    >>> "".join(e.find("body").itertext())
518    'this is a paragraph.'
519    """
520
521ENTITY_XML = """\
522<!DOCTYPE points [
523<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
524%user-entities;
525]>
526<document>&entity;</document>
527"""
528
529def entity():
530    """
531    Test entity handling.
532
533    1) bad entities
534
535    >>> ElementTree.XML("<document>&entity;</document>")
536    Traceback (most recent call last):
537    ParseError: undefined entity: line 1, column 10
538
539    >>> ElementTree.XML(ENTITY_XML)
540    Traceback (most recent call last):
541    ParseError: undefined entity &entity;: line 5, column 10
542
543    (add more tests here)
544
545    """
546
547def error(xml):
548    """
549    Test error handling.
550
551    >>> error("foo").position
552    (1, 0)
553    >>> error("<tag>&foo;</tag>").position
554    (1, 5)
555    >>> error("foobar<").position
556    (1, 6)
557
558    """
559    try:
560        ET.XML(xml)
561    except ET.ParseError:
562        return sys.exc_value
563
564def namespace():
565    """
566    Test namespace issues.
567
568    1) xml namespace
569
570    >>> elem = ElementTree.XML("<tag xml:lang='en' />")
571    >>> serialize(elem) # 1.1
572    '<tag xml:lang="en" />'
573
574    2) other "well-known" namespaces
575
576    >>> elem = ElementTree.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
577    >>> serialize(elem) # 2.1
578    '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />'
579
580    >>> elem = ElementTree.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
581    >>> serialize(elem) # 2.2
582    '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />'
583
584    >>> elem = ElementTree.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
585    >>> serialize(elem) # 2.3
586    '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />'
587
588    3) unknown namespaces
589
590    """
591
592def qname():
593    """
594    Test QName handling.
595
596    1) decorated tags
597
598    >>> elem = ElementTree.Element("{uri}tag")
599    >>> serialize(elem) # 1.1
600    '<ns0:tag xmlns:ns0="uri" />'
601    >>> elem = ElementTree.Element(ElementTree.QName("{uri}tag"))
602    >>> serialize(elem) # 1.2
603    '<ns0:tag xmlns:ns0="uri" />'
604    >>> elem = ElementTree.Element(ElementTree.QName("uri", "tag"))
605    >>> serialize(elem) # 1.3
606    '<ns0:tag xmlns:ns0="uri" />'
607
608    2) decorated attributes
609
610    >>> elem.clear()
611    >>> elem.attrib["{uri}key"] = "value"
612    >>> serialize(elem) # 2.1
613    '<ns0:tag ns0:key="value" xmlns:ns0="uri" />'
614
615    >>> elem.clear()
616    >>> elem.attrib[ElementTree.QName("{uri}key")] = "value"
617    >>> serialize(elem) # 2.2
618    '<ns0:tag ns0:key="value" xmlns:ns0="uri" />'
619
620    3) decorated values are not converted by default, but the
621       QName wrapper can be used for values
622
623    >>> elem.clear()
624    >>> elem.attrib["{uri}key"] = "{uri}value"
625    >>> serialize(elem) # 3.1
626    '<ns0:tag ns0:key="{uri}value" xmlns:ns0="uri" />'
627
628    >>> elem.clear()
629    >>> elem.attrib["{uri}key"] = ElementTree.QName("{uri}value")
630    >>> serialize(elem) # 3.2
631    '<ns0:tag ns0:key="ns0:value" xmlns:ns0="uri" />'
632
633    >>> elem.clear()
634    >>> subelem = ElementTree.Element("tag")
635    >>> subelem.attrib["{uri1}key"] = ElementTree.QName("{uri2}value")
636    >>> elem.append(subelem)
637    >>> elem.append(subelem)
638    >>> serialize(elem) # 3.3
639    '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2"><tag ns1:key="ns2:value" /><tag ns1:key="ns2:value" /></ns0:tag>'
640
641    """
642
643def xpath_tokenizer(p):
644    """
645    Test the XPath tokenizer.
646
647    >>> # tests from the xml specification
648    >>> xpath_tokenizer("*")
649    ['*']
650    >>> xpath_tokenizer("text()")
651    ['text', '()']
652    >>> xpath_tokenizer("@name")
653    ['@', 'name']
654    >>> xpath_tokenizer("@*")
655    ['@', '*']
656    >>> xpath_tokenizer("para[1]")
657    ['para', '[', '1', ']']
658    >>> xpath_tokenizer("para[last()]")
659    ['para', '[', 'last', '()', ']']
660    >>> xpath_tokenizer("*/para")
661    ['*', '/', 'para']
662    >>> xpath_tokenizer("/doc/chapter[5]/section[2]")
663    ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']']
664    >>> xpath_tokenizer("chapter//para")
665    ['chapter', '//', 'para']
666    >>> xpath_tokenizer("//para")
667    ['//', 'para']
668    >>> xpath_tokenizer("//olist/item")
669    ['//', 'olist', '/', 'item']
670    >>> xpath_tokenizer(".")
671    ['.']
672    >>> xpath_tokenizer(".//para")
673    ['.', '//', 'para']
674    >>> xpath_tokenizer("..")
675    ['..']
676    >>> xpath_tokenizer("../@lang")
677    ['..', '/', '@', 'lang']
678    >>> xpath_tokenizer("chapter[title]")
679    ['chapter', '[', 'title', ']']
680    >>> xpath_tokenizer("employee[@secretary and @assistant]")
681    ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']
682
683    >>> # additional tests
684    >>> xpath_tokenizer("{http://spam}egg")
685    ['{http://spam}egg']
686    >>> xpath_tokenizer("./spam.egg")
687    ['.', '/', 'spam.egg']
688    >>> xpath_tokenizer(".//{http://spam}egg")
689    ['.', '//', '{http://spam}egg']
690    """
691    out = []
692    for op, tag in ElementPath.xpath_tokenizer(p):
693        out.append(op or tag)
694    return out
695
696#
697# xinclude tests (samples from appendix C of the xinclude specification)
698
699XINCLUDE = {}
700
701XINCLUDE["C1.xml"] = """\
702<?xml version='1.0'?>
703<document xmlns:xi="http://www.w3.org/2001/XInclude">
704  <p>120 Mz is adequate for an average home user.</p>
705  <xi:include href="disclaimer.xml"/>
706</document>
707"""
708
709XINCLUDE["disclaimer.xml"] = """\
710<?xml version='1.0'?>
711<disclaimer>
712  <p>The opinions represented herein represent those of the individual
713  and should not be interpreted as official policy endorsed by this
714  organization.</p>
715</disclaimer>
716"""
717
718XINCLUDE["C2.xml"] = """\
719<?xml version='1.0'?>
720<document xmlns:xi="http://www.w3.org/2001/XInclude">
721  <p>This document has been accessed
722  <xi:include href="count.txt" parse="text"/> times.</p>
723</document>
724"""
725
726XINCLUDE["count.txt"] = "324387"
727
728XINCLUDE["C3.xml"] = """\
729<?xml version='1.0'?>
730<document xmlns:xi="http://www.w3.org/2001/XInclude">
731  <p>The following is the source of the "data.xml" resource:</p>
732  <example><xi:include href="data.xml" parse="text"/></example>
733</document>
734"""
735
736XINCLUDE["data.xml"] = """\
737<?xml version='1.0'?>
738<data>
739  <item><![CDATA[Brooks & Shields]]></item>
740</data>
741"""
742
743XINCLUDE["C5.xml"] = """\
744<?xml version='1.0'?>
745<div xmlns:xi="http://www.w3.org/2001/XInclude">
746  <xi:include href="example.txt" parse="text">
747    <xi:fallback>
748      <xi:include href="fallback-example.txt" parse="text">
749        <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
750      </xi:include>
751    </xi:fallback>
752  </xi:include>
753</div>
754"""
755
756XINCLUDE["default.xml"] = """\
757<?xml version='1.0'?>
758<document xmlns:xi="http://www.w3.org/2001/XInclude">
759  <p>Example.</p>
760  <xi:include href="samples/simple.xml"/>
761</document>
762"""
763
764def xinclude_loader(href, parse="xml", encoding=None):
765    try:
766        data = XINCLUDE[href]
767    except KeyError:
768        raise IOError("resource not found")
769    if parse == "xml":
770        return ElementTree.XML(data)
771    return data
772
773def xinclude():
774    r"""
775    Basic inclusion example (XInclude C.1)
776
777    >>> document = xinclude_loader("C1.xml")
778    >>> ElementInclude.include(document, xinclude_loader)
779    >>> print serialize(document) # C1
780    <document>
781      <p>120 Mz is adequate for an average home user.</p>
782      <disclaimer>
783      <p>The opinions represented herein represent those of the individual
784      and should not be interpreted as official policy endorsed by this
785      organization.</p>
786    </disclaimer>
787    </document>
788
789    Textual inclusion example (XInclude C.2)
790
791    >>> document = xinclude_loader("C2.xml")
792    >>> ElementInclude.include(document, xinclude_loader)
793    >>> print serialize(document) # C2
794    <document>
795      <p>This document has been accessed
796      324387 times.</p>
797    </document>
798
799    Textual inclusion of XML example (XInclude C.3)
800
801    >>> document = xinclude_loader("C3.xml")
802    >>> ElementInclude.include(document, xinclude_loader)
803    >>> print serialize(document) # C3
804    <document>
805      <p>The following is the source of the "data.xml" resource:</p>
806      <example>&lt;?xml version='1.0'?&gt;
807    &lt;data&gt;
808      &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;
809    &lt;/data&gt;
810    </example>
811    </document>
812
813    Fallback example (XInclude C.5)
814    Note! Fallback support is not yet implemented
815
816    >>> document = xinclude_loader("C5.xml")
817    >>> ElementInclude.include(document, xinclude_loader)
818    Traceback (most recent call last):
819    IOError: resource not found
820    >>> # print serialize(document) # C5
821
822    """
823
824def xinclude_default():
825    """
826    >>> document = xinclude_loader("default.xml")
827    >>> ElementInclude.include(document)
828    >>> print serialize(document) # default
829    <document>
830      <p>Example.</p>
831      <root>
832       <element key="value">text</element>
833       <element>text</element>tail
834       <empty-element />
835    </root>
836    </document>
837    """
838
839#
840# xmlwriter
841
842def xmlwriter():
843    r"""
844    >>> file = StringIO.StringIO()
845    >>> w = SimpleXMLWriter.XMLWriter(file)
846    >>> html = w.start("html")
847    >>> x = w.start("head")
848    >>> w.element("title", "my document")
849    >>> w.data("\n")
850    >>> w.element("meta", name="hello", value="goodbye")
851    >>> w.data("\n")
852    >>> w.end()
853    >>> x = w.start("body")
854    >>> w.element("h1", "this is a heading")
855    >>> w.data("\n")
856    >>> w.element("p", u"this is a paragraph")
857    >>> w.data("\n")
858    >>> w.element("p", u"reserved characters: <&>")
859    >>> w.data("\n")
860    >>> w.element("p", u"detta är också ett stycke")
861    >>> w.data("\n")
862    >>> w.close(html)
863    >>> print file.getvalue()
864    <html><head><title>my document</title>
865    <meta name="hello" value="goodbye" />
866    </head><body><h1>this is a heading</h1>
867    <p>this is a paragraph</p>
868    <p>reserved characters: &lt;&amp;&gt;</p>
869    <p>detta &#228;r ocks&#229; ett stycke</p>
870    </body></html>
871    """
872
873# --------------------------------------------------------------------
874# reported bugs
875
876def bug_xmltoolkit21():
877    """
878    marshaller gives obscure errors for non-string values
879
880    >>> elem = ElementTree.Element(123)
881    >>> serialize(elem) # tag
882    Traceback (most recent call last):
883    TypeError: cannot serialize 123 (type int)
884    >>> elem = ElementTree.Element("elem")
885    >>> elem.text = 123
886    >>> serialize(elem) # text
887    Traceback (most recent call last):
888    TypeError: cannot serialize 123 (type int)
889    >>> elem = ElementTree.Element("elem")
890    >>> elem.tail = 123
891    >>> serialize(elem) # tail
892    Traceback (most recent call last):
893    TypeError: cannot serialize 123 (type int)
894    >>> elem = ElementTree.Element("elem")
895    >>> elem.set(123, "123")
896    >>> serialize(elem) # attribute key
897    Traceback (most recent call last):
898    TypeError: cannot serialize 123 (type int)
899    >>> elem = ElementTree.Element("elem")
900    >>> elem.set("123", 123)
901    >>> serialize(elem) # attribute value
902    Traceback (most recent call last):
903    TypeError: cannot serialize 123 (type int)
904
905    """
906
907def bug_xmltoolkit25():
908    """
909    typo in ElementTree.findtext
910
911    >>> tree = ElementTree.ElementTree(SAMPLE_XML)
912    >>> tree.findtext("tag")
913    'text'
914    >>> tree.findtext("section/tag")
915    'subtext'
916    """
917
918def bug_xmltoolkit28():
919    """
920    .//tag causes exceptions
921
922    >>> tree = ElementTree.XML("<doc><table><tbody/></table></doc>")
923    >>> summarize_list(tree.findall(".//thead"))
924    []
925    >>> summarize_list(tree.findall(".//tbody"))
926    ['tbody']
927    """
928
929def bug_xmltoolkitX1():
930    """
931    dump() doesn't flush the output buffer
932
933    >>> tree = ElementTree.XML("<doc><table><tbody/></table></doc>")
934    >>> ElementTree.dump(tree); sys.stdout.write("tail")
935    <doc><table><tbody /></table></doc>
936    tail
937    """
938
939def bug_xmltoolkit39():
940    """
941    non-ascii element and attribute names doesn't work
942
943    >>> tree = ElementTree.XML("<?xml version='1.0' encoding='iso-8859-1'?><täg />")
944    >>> ElementTree.tostring(tree, "utf-8")
945    '<t\\xc3\\xa4g />'
946
947    >>> tree = ElementTree.XML("<?xml version='1.0' encoding='iso-8859-1'?><tag ättr='v&#228;lue' />")
948    >>> tree.attrib
949    {u'\\xe4ttr': u'v\\xe4lue'}
950    >>> ElementTree.tostring(tree, "utf-8")
951    '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
952
953    >>> tree = ElementTree.XML("<?xml version='1.0' encoding='iso-8859-1'?><täg>text</täg>")
954    >>> ElementTree.tostring(tree, "utf-8")
955    '<t\\xc3\\xa4g>text</t\\xc3\\xa4g>'
956
957    >>> tree = ElementTree.Element(u"täg")
958    >>> ElementTree.tostring(tree, "utf-8")
959    '<t\\xc3\\xa4g />'
960
961    >>> tree = ElementTree.Element("tag")
962    >>> tree.set(u"ättr", u"välue")
963    >>> ElementTree.tostring(tree, "utf-8")
964    '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
965
966    """
967
968def bug_xmltoolkit45():
969    """
970    problems parsing mixed unicode/non-ascii html documents
971
972    latin-1 text
973    >>> p = HTMLTreeBuilder.TreeBuilder()
974    >>> p.feed("<p>välue</p>")
975    >>> serialize(p.close())
976    '<p>v&#228;lue</p>'
977
978    utf-8 text
979    >>> p = HTMLTreeBuilder.TreeBuilder(encoding="utf-8")
980    >>> p.feed("<p>v\xc3\xa4lue</p>")
981    >>> serialize(p.close())
982    '<p>v&#228;lue</p>'
983
984    utf-8 text using meta tag
985    >>> p = HTMLTreeBuilder.TreeBuilder()
986    >>> p.feed("<html><meta http-equiv='Content-Type' content='text/html; charset=utf-8'><p>v\xc3\xa4lue</p></html>")
987    >>> serialize(p.close().find("p"))
988    '<p>v&#228;lue</p>'
989
990    latin-1 character references
991    >>> p = HTMLTreeBuilder.TreeBuilder()
992    >>> p.feed("<p>v&#228;lue</p>")
993    >>> serialize(p.close())
994    '<p>v&#228;lue</p>'
995
996    latin-1 character entities
997    >>> p = HTMLTreeBuilder.TreeBuilder()
998    >>> p.feed("<p>v&auml;lue</p>")
999    >>> serialize(p.close())
1000    '<p>v&#228;lue</p>'
1001
1002    mixed latin-1 text and unicode entities
1003    >>> p = HTMLTreeBuilder.TreeBuilder()
1004    >>> p.feed("<p>&#8221;välue&#8221;</p>")
1005    >>> serialize(p.close())
1006    '<p>&#8221;v&#228;lue&#8221;</p>'
1007
1008    mixed unicode and latin-1 entities
1009    >>> p = HTMLTreeBuilder.TreeBuilder()
1010    >>> p.feed("<p>&#8221;v&auml;lue&#8221;</p>")
1011    >>> serialize(p.close())
1012    '<p>&#8221;v&#228;lue&#8221;</p>'
1013
1014    """
1015
1016def bug_xmltoolkit46():
1017    """
1018    problems parsing open BR tags
1019
1020    >>> p = HTMLTreeBuilder.TreeBuilder()
1021    >>> p.feed("<p>key<br>value</p>")
1022    >>> serialize(p.close())
1023    '<p>key<br />value</p>'
1024
1025    """
1026
1027def bug_xmltoolkit54():
1028    """
1029    problems handling internally defined entities
1030
1031    >>> e = ElementTree.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]><doc>&ldots;</doc>")
1032    >>> serialize(e)
1033    '<doc>&#33328;</doc>'
1034    """
1035
1036def bug_xmltoolkit55():
1037    """
1038    make sure we're reporting the first error, not the last
1039
1040    >>> e = ElementTree.XML("<!DOCTYPE doc SYSTEM 'doc.dtd'><doc>&ldots;&ndots;&rdots;</doc>")
1041    Traceback (most recent call last):
1042    ParseError: undefined entity &ldots;: line 1, column 36
1043    """
1044
1045# --------------------------------------------------------------------
1046
1047def bug_200708_version():
1048    """
1049    >>> parser = ET.XMLParser()
1050    >>> parser.version
1051    'Expat 2.0.0'
1052    >>> parser.feed(open("samples/simple.xml").read())
1053    >>> print serialize(parser.close())
1054    <root>
1055       <element key="value">text</element>
1056       <element>text</element>tail
1057       <empty-element />
1058    </root>
1059    """
1060
1061def bug_200708_newline():
1062    r"""
1063
1064    Preserve newlines in attributes.
1065
1066    >>> e = ET.Element('SomeTag', text="def _f():\n  return 3\n")
1067    >>> ET.tostring(e)
1068    '<SomeTag text="def _f():&#10;  return 3&#10;" />'
1069    >>> ET.XML(ET.tostring(e)).get("text")
1070    'def _f():\n  return 3\n'
1071    >>> ET.tostring(ET.XML(ET.tostring(e)))
1072    '<SomeTag text="def _f():&#10;  return 3&#10;" />'
1073    """
1074
1075def bug_200709_default_namespace():
1076    """
1077
1078    >>> e = ET.Element("{default}elem")
1079    >>> s = ET.SubElement(e, "{default}elem")
1080    >>> serialize(e, default_namespace="default") # 1
1081    '<elem xmlns="default"><elem /></elem>'
1082
1083    >>> e = ET.Element("{default}elem")
1084    >>> s = ET.SubElement(e, "{default}elem")
1085    >>> s = ET.SubElement(e, "{not-default}elem")
1086    >>> serialize(e, default_namespace="default") # 2
1087    '<elem xmlns="default" xmlns:ns1="not-default"><elem /><ns1:elem /></elem>'
1088
1089    >>> e = ET.Element("{default}elem")
1090    >>> s = ET.SubElement(e, "{default}elem")
1091    >>> s = ET.SubElement(e, "elem") # unprefixed name
1092    >>> serialize(e, default_namespace="default") # 3
1093    Traceback (most recent call last):
1094    ValueError: cannot use non-qualified names with default_namespace option
1095
1096    """
1097
1098# --------------------------------------------------------------------
1099
1100if __name__ == "__main__":
1101    import doctest, selftest
1102    failed, tested = doctest.testmod(selftest)
1103    print tested - failed, "tests ok."
Note: See TracBrowser for help on using the repository browser.