source: TI07-MOLES/trunk/StubB/XSLT/browse/portal/cgi/ETxmlView.py @ 1097

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI07-MOLES/trunk/StubB/XSLT/browse/portal/cgi/ETxmlView.py@1097
Revision 1097, 3.6 KB checked in by lawrence, 14 years ago (diff)

This has the code to support a temporary patch for
the unicode and orphan html characters problems. Further
details are in ticket:311

Line 
1# Copyright Bryan Lawrence, Rutherford Appleton Laboratory, CCLRC, 2006
2#
3# This code is made available under the GPL, if you don't know what
4# that means, you have no rights to copy it or use it!
5#
6# Code to support xml and elementTree viewing as text and html
7
8import ElementTree as ET
9import re
10from sub_orphan import *
11
12def et2text(elem,indent='',html=0,space='   '):
13        '''Lightweight pretty printing of elementTree elements'''
14        def estrip(elem):
15                ''' Just want to get rid of unwanted whitespace '''
16                if elem is None:
17                        return ''
18                else:
19                        return elem.strip()
20        strAttrib=''
21        for att in elem.attrib:
22                strAttrib+=' %s="%s"'%(att,elem.attrib[att])
23        result='%s<%s%s>%s'%(indent,elem.tag,strAttrib,estrip(elem.text))
24        children=len(elem)
25        if children:
26                for item in elem:
27                        result+='\n'+et2text(item,indent=indent+space)
28                result+='\n%s%s</%s>'%(indent,estrip(item.tail),elem.tag)
29        else:
30                result+='</%s>'%(elem.tag)
31        return result
32
33
34def et2html(elem,matchList=[],number=0):
35    #this method is NDG code ... copyright CCLRC ...
36    '''Lightweight HTML pretty printing of elementTree elements + highlight
37    any words which occur in the element text (and tails) which occur in matchList,
38    and formatted using a css something like this:
39    ===
40    DIV.xmlElem {PADDING-LEFT: 20px;}
41    .xmlAttrVal {COLOR:Red; }
42    .xmlAttrTyp {COLOR:Green; }
43    .xmlElemTag {COLOR:Blue; }
44.   highlight {BACKGROUND-COLOR:Yellow; }
45    ===
46    Line number is not yet implemented.
47    '''
48    def span(x,c): return '<span class="%s">%s</span>'%(c,x)
49    def div(x,c): return '<div class="%s">%s</div>'%(c,x)
50    def match(x): 
51        if x is None: return ''
52        for w in matchList: x=re.sub(w,span(w,'highlight'),x)
53        return x
54    lt,gt='<b>&lt;</b>','<b>&gt;</b>'
55    strAttrib=''
56    for att in elem.attrib:
57        strAttrib+=' %s="%s"'%(span(att,'xmlAttrTyp'),span(elem.attrib[att],'xmlAttrVal'))
58    result='%s%s%s%s%s'%(lt,span(elem.tag,"xmlElemTag"),strAttrib,gt,match(elem.text))
59    children=len(elem)
60    if children:
61        for item in elem:
62            result+=et2html(item,matchList)
63        result+='%s%s/%s%s'%(match(item.tail),lt,span(elem.tag,'xmlElemTag'),gt)
64    else:
65        result+='%s/%s%s'%(lt,span(elem.tag,'xmlElemTag'),gt)
66    return div(result,'xmlElem')
67   
68def loadET(inputString):
69    ''' This method returns an elementtree object after some cleaning
70    of the string, essentially a hack to make sure that xml doesn't contain any
71    naughty & characters alone (typically from URL copies), or unescaped orphan
72    < or > signs ... and that the unicode has been processed to something that
73    might work'''
74     
75    inputString=re.sub(r'&(?!\w+;)', '&amp;', inputString)
76   
77    # first just try and do it so we don't waste time if we don't need to ...
78    try:
79        elem=ET.fromstring(inputString)
80        return elem
81    except:
82        pass
83        # and carry on
84
85    #ok, let's deal with orphan > and < signs then ...
86    subtool=subAI()
87    s=subtool.sub(inputString)
88
89    #now let's sort out an encoding
90    encodings=['utf-8','latin-1','iso-8859-1','ascii',]
91    elem=None
92    for option in encodings:
93        try:
94            s=s.encode(option,'replace')
95            try:
96                elem=ET.fromstring(s)
97            except:
98                s=re.sub('\n','<br/>LINE: ',s)
99                print '<p>%s</p>'%s
100                raise
101        except UnicodeError:
102            pass
103        else:
104            break
105    return elem
106
107def xml2text(xmlString):
108    tree=loadET(xmlString)
109    return et2text(tree)
110
111def xml2HTML(xmlString,**kw):
112    tree=loadET(xmlString)
113    return et2HTML(tree,**kw)
Note: See TracBrowser for help on using the repository browser.