source: TI07-MOLES/trunk/PythonCode/wsgi/ETxmlView.py @ 2045

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI07-MOLES/trunk/PythonCode/wsgi/ETxmlView.py@2045
Revision 2045, 6.5 KB checked in by lawrence, 12 years ago (diff)

Sundry modes to wsgi browse/discovery on the road to deploying on
glue (mostly about the many possible locations of elementtree)

Line 
1# Copyright Bryan Lawrence, Rutherford Appleton Laboratory, CCLRC, 2006
2#
3# This code is made available under the GPL, if you don't know what
4# that means, you have no rights to copy it or use it!
5#
6# Code to support xml and elementTree viewing as text and html
7try: #python 2.5
8    from xml.etree import ElementTree as ET
9except ImportError:
10    try:
11        # if you've installed it yourself it comes this way
12        import ElementTree as ET
13    except ImportError:
14        # if you've egged it this is the way it comes
15        from elementtree import ElementTree as ET
16import re
17
18class subAI:
19    ''' This is Alan Iwi's substitute and replace orphan <> code '''
20    def __init__(self):
21        self.r1=re.compile('<([^>]*(<|$))')
22        self.r2=re.compile('((^|>)[^<]*)>')
23    def sub(self,s):
24        old=''
25        while s != old:
26            old=s
27            s=self.r1.sub(r'&lt;\1',s)
28            s=self.r2.sub(r'\1&gt;',s)
29        return s
30       
31class nsdumb:
32    ''' provides an xpath interface to element tree nodes which
33    is namespace agnostic '''
34    def __init__(self,root=None):
35        ''' Provide a root element with namespace definitions when
36        instantiatin '''
37        if root is None: 
38            self.xmlns=''
39            return
40        ns=['xmlns','{http://www.w3.org/2001/XMLSchema-instance}schemaLocation']
41        for i in ns: 
42            if i in root.keys():
43                self.xmlns='{%s}'%root.attrib[i].split(' ')[0]
44                break
45            else:
46                self.xmlns=''
47    def __str__(self):
48        return 'Element Tree namespace helper with namespace: [%s]'%self.xmlns
49   
50    def __distributens(self,xpathExpression):
51        ''' Actually we only support tag finding in this '''
52        tags=xpathExpression.split('/')
53        new=''
54        for t in tags: new+=self.xmlns+t+'/'
55        new=new[0:-1]
56        return new
57    def getText(self,elem,xpathExpression,multiple=0):
58        ''' Get a text object sensibly '''
59        if elem is None: 
60            if multiple:
61                return ['',]
62            else: return '' 
63        if multiple:
64                r=elem.findall(self.__distributens(xpathExpression))
65        else:
66                r=[elem.find(self.__distributens(xpathExpression)),]
67        try:  # if element is None, this should fail ...
68                rr=[]
69                for i in r:
70                    t=i.text
71                    if t is not None: 
72                        rr.append(t)
73                    else: rr.append('')
74        except:
75                rr=['',]
76        if multiple: 
77                return rr
78        else: return rr[0] 
79       
80    def find(self,elem,xpathExpression):
81       ''' Return relevant subelement '''
82       if elem is None: return ''
83       xe=self.__distributens(xpathExpression)
84       return elem.find(xe)
85   
86    def findall(self,elem,xpathExpression):
87       ''' Return all relevant subelements '''
88       if elem is None: return []
89       xe=self.__distributens(xpathExpression)
90       return elem.findall(xe)
91
92    def strip(self,tag):
93        ''' Given a tag, strip the default namespace '''
94        return tag.lstrip(self.xmlns)
95
96def et2text(elem,indent='',html=0,space='   ',helper=None):
97        '''Lightweight pretty printing of elementTree elements'''
98        def estrip(elem):
99                ''' Just want to get rid of unwanted whitespace '''
100                if elem is None:
101                        return ''
102                else:
103                        return elem.strip()
104        ns=helper
105        if ns is None: ns=nsdumb(elem)
106        strAttrib=''
107        for att in elem.attrib:
108                strAttrib+=' %s="%s"'%(att,elem.attrib[att])
109        result='%s<%s%s>%s'%(indent,ns.strip(elem.tag),strAttrib,estrip(elem.text))
110        children=len(elem)
111        if children:
112                for item in elem:
113                        result+='\n'+et2text(item,indent=indent+space,helper=ns)
114                result+='\n%s%s</%s>'%(indent,estrip(item.tail),ns.strip(elem.tag))
115        else:
116                result+='</%s>'%(ns.strip(elem.tag))
117        return result
118
119
120def et2html(elem,matchList=[],number=0,helper=None):
121    #this method is NDG code ... copyright CCLRC ...
122    '''Lightweight HTML pretty printing of elementTree elements + highlight
123    any words which occur in the element text (and tails) which occur in matchList,
124    and formatted using a css something like this:
125    ===
126    DIV.xmlElem {PADDING-LEFT: 20px;}
127    .xmlAttrVal {COLOR:Red; }
128    .xmlAttrTyp {COLOR:Green; }
129    .xmlElemTag {COLOR:Blue; }
130.   highlight {BACKGROUND-COLOR:Yellow; }
131    ===
132    Line number is not yet implemented.
133    '''
134    def span(x,c): return '<span class="%s">%s</span>'%(c,x)
135    def div(x,c): return '<div class="%s">%s</div>'%(c,x)
136    def match(x): 
137        if x is None: return ''
138        for w in matchList: x=re.sub(w,span(w,'highlight'),x)
139        return x
140    lt,gt='<b>&lt;</b>','<b>&gt;</b>'
141    ns=helper
142    if ns is None: ns=nsdumb(elem) 
143    strAttrib=''
144    for att in elem.attrib:
145        strAttrib+=' %s="%s"'%(span(att,'xmlAttrTyp'),span(elem.attrib[att],'xmlAttrVal'))
146    result='%s%s%s%s%s'%(lt,span(ns.strip(elem.tag),"xmlElemTag"),strAttrib,gt,match(elem.text))
147    children=len(elem)
148    if children:
149        for item in elem:
150            result+=et2html(item,matchList,helper=ns)
151        result+='%s%s/%s%s'%(match(item.tail),lt,span(ns.strip(elem.tag),'xmlElemTag'),gt)
152    else:
153        result+='%s/%s%s'%(lt,span(ns.strip(elem.tag),'xmlElemTag'),gt)
154    return div(result,'xmlElem')
155   
156def loadET(inputString):
157    ''' This method returns an elementtree object after some cleaning
158    of the string, essentially a hack to make sure that xml doesn't contain any
159    naughty & characters alone (typically from URL copies), or unescaped orphan
160    < or > signs ... and that the unicode has been processed to something that
161    might work'''
162   
163    if inputString is None: return None
164    inputString=re.sub(r'&(?!\w+;)', '&amp;', inputString)
165 
166    # first just try and do it so we don't waste time if we don't need to ...
167    try:
168        elem=ET.fromstring(inputString)
169        return elem
170    except:
171        pass
172        # and carry on
173
174    #ok, let's deal with orphan > and < signs then ...
175    subtool=subAI()
176    s=subtool.sub(inputString)
177
178    #now let's sort out an encoding
179    encodings=['utf-8','latin-1','iso-8859-1','ascii',]
180    elem=None
181    for option in encodings:
182        try:
183            s=s.encode(option,'replace')
184            try:
185                elem=ET.fromstring(s)
186            except:
187                s=re.sub('\n','<br/>LINE: ',s)
188                print '<p>%s</p>'%s
189                raise
190        except UnicodeError:
191            pass
192        else:
193            break
194    return elem
195
196def xml2text(xmlString):
197    tree=loadET(xmlString)
198    return et2text(tree)
199
200def xml2HTML(xmlString,**kw):
201    tree=loadET(xmlString)
202    return et2HTML(tree,**kw)
Note: See TracBrowser for help on using the repository browser.