source: exist/trunk/python/elementtree-1.3a6-20070220-badc/unittests/test_html.py @ 3578

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/elementtree-1.3a6-20070220-badc/unittests/test_html.py@3578
Revision 3578, 2.1 KB checked in by pjkersha, 11 years ago (diff)

Latest releases from Fredrik Lundh. 10 March release has exclusive C14N support with namespace prefixes.

Line 
1# $Id: test_html.py 3298 2007-09-13 14:38:02Z fredrik $
2# -*- coding: iso-8859-1 -*-
3
4from support import *
5from elementtree import HTMLTreeBuilder
6
7def parsehtml():
8    """
9    Test HTML parsing.
10
11    >>> p = HTMLTreeBuilder.TreeBuilder()
12    >>> p.feed("<p><p>spam<b>egg</b></p>")
13    >>> serialize(p.close())
14    '<p>spam<b>egg</b></p>'
15    """
16
17# --------------------------------------------------------------------
18
19def bug_xmltoolkit45():
20    """
21    problems parsing mixed unicode/non-ascii html documents
22
23    latin-1 text
24    >>> p = HTMLTreeBuilder.TreeBuilder()
25    >>> p.feed("<p>välue</p>")
26    >>> serialize(p.close())
27    '<p>v&#228;lue</p>'
28
29    utf-8 text
30    >>> p = HTMLTreeBuilder.TreeBuilder(encoding="utf-8")
31    >>> p.feed("<p>v\xc3\xa4lue</p>")
32    >>> serialize(p.close())
33    '<p>v&#228;lue</p>'
34
35    utf-8 text using meta tag
36    >>> p = HTMLTreeBuilder.TreeBuilder()
37    >>> p.feed("<html><meta http-equiv='Content-Type' content='text/html; charset=utf-8'><p>v\xc3\xa4lue</p></html>")
38    >>> serialize(p.close().find("p"))
39    '<p>v&#228;lue</p>'
40
41    latin-1 character references
42    >>> p = HTMLTreeBuilder.TreeBuilder()
43    >>> p.feed("<p>v&#228;lue</p>")
44    >>> serialize(p.close())
45    '<p>v&#228;lue</p>'
46
47    latin-1 character entities
48    >>> p = HTMLTreeBuilder.TreeBuilder()
49    >>> p.feed("<p>v&auml;lue</p>")
50    >>> serialize(p.close())
51    '<p>v&#228;lue</p>'
52
53    mixed latin-1 text and unicode entities
54    >>> p = HTMLTreeBuilder.TreeBuilder()
55    >>> p.feed("<p>&#8221;välue&#8221;</p>")
56    >>> serialize(p.close())
57    '<p>&#8221;v&#228;lue&#8221;</p>'
58
59    mixed unicode and latin-1 entities
60    >>> p = HTMLTreeBuilder.TreeBuilder()
61    >>> p.feed("<p>&#8221;v&auml;lue&#8221;</p>")
62    >>> serialize(p.close())
63    '<p>&#8221;v&#228;lue&#8221;</p>'
64
65    """
66
67def bug_xmltoolkit46():
68    """
69    problems parsing open BR tags
70
71    >>> p = HTMLTreeBuilder.TreeBuilder()
72    >>> p.feed("<p>key<br>value</p>")
73    >>> serialize(p.close())
74    '<p>key<br />value</p>'
75
76    """
Note: See TracBrowser for help on using the repository browser.