source: TI07-MOLES/trunk/StubB/XSLT/browse/portal/cgi/browse/ETxmlView.py @ 1182

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI07-MOLES/trunk/StubB/XSLT/browse/portal/cgi/browse/ETxmlView.py@1182
Revision 1182, 5.4 KB checked in by lawrence, 14 years ago (diff)

A large number of small modifications to browse
dealing with a range of bugs ...

Line 
1# Copyright Bryan Lawrence, Rutherford Appleton Laboratory, CCLRC, 2006
2#
3# This code is made available under the GPL, if you don't know what
4# that means, you have no rights to copy it or use it!
5#
6# Code to support xml and elementTree viewing as text and html
7
8import ElementTree as ET
9import re
10from sub_orphan import *
11
12class nsdumb:
13    ''' provides an xpath interface to element tree nodes which
14    is namespace agnostic '''
15    def __init__(self,root=None):
16        ''' Provide a root element with namespace definitions when
17        instantiatin '''
18        if root is None: 
19            self.xmlns=''
20            return
21        ns=['xmlns','{http://www.w3.org/2001/XMLSchema-instance}schemaLocation']
22        for i in ns: 
23            if i in root.keys():
24                self.xmlns='{%s}'%root.attrib[i].split(' ')[0]
25                break
26            else:
27                self.xmlns=''
28    def __str__(self):
29        return 'Element Tree namespace helper with namespace: [%s]'%self.xmlns
30    def getText(self,elem,xpathExpression,multiple=0):
31        ''' Get a text object sensibly '''
32        if multiple:
33                r=elem.findall(self.xmlns+xpathExpression)
34        else:
35                r=[elem.find(self.xmlns+xpathExpression),]
36        try:  # if element is None, this should fail ...
37                rr=[]
38                for i in r:
39                    t=i.text
40                    if t is not None: 
41                        rr.append(t)
42                    else: rr.append('')
43        except:
44                rr=['',]
45        if multiple: 
46                return rr
47        else: return rr[0] 
48       
49    def find(self,elem,xpathExpression):
50       ''' Return relevant subelement '''
51       xe=self.xmlns+xpathExpression
52       return elem.find(xe)
53   
54    def findall(self,elem,xpathExpression):
55       ''' Return all relevant subelements '''
56       xe=self.xmlns+xpathExpression
57       return elem.findall(xe)
58
59    def strip(self,tag):
60        ''' Given a tag, strip the default namespace '''
61        return tag.lstrip(self.xmlns)
62
63def et2text(elem,indent='',html=0,space='   ',helper=None):
64        '''Lightweight pretty printing of elementTree elements'''
65        def estrip(elem):
66                ''' Just want to get rid of unwanted whitespace '''
67                if elem is None:
68                        return ''
69                else:
70                        return elem.strip()
71        ns=helper
72        if ns is None: ns=nsdumb(elem)
73        strAttrib=''
74        for att in elem.attrib:
75                strAttrib+=' %s="%s"'%(att,elem.attrib[att])
76        result='%s<%s%s>%s'%(indent,ns.strip(elem.tag),strAttrib,estrip(elem.text))
77        children=len(elem)
78        if children:
79                for item in elem:
80                        result+='\n'+et2text(item,indent=indent+space,helper=ns)
81                result+='\n%s%s</%s>'%(indent,estrip(item.tail),ns.strip(elem.tag))
82        else:
83                result+='</%s>'%(ns.strip(elem.tag))
84        return result
85
86
87def et2html(elem,matchList=[],number=0,helper=None):
88    #this method is NDG code ... copyright CCLRC ...
89    '''Lightweight HTML pretty printing of elementTree elements + highlight
90    any words which occur in the element text (and tails) which occur in matchList,
91    and formatted using a css something like this:
92    ===
93    DIV.xmlElem {PADDING-LEFT: 20px;}
94    .xmlAttrVal {COLOR:Red; }
95    .xmlAttrTyp {COLOR:Green; }
96    .xmlElemTag {COLOR:Blue; }
97.   highlight {BACKGROUND-COLOR:Yellow; }
98    ===
99    Line number is not yet implemented.
100    '''
101    def span(x,c): return '<span class="%s">%s</span>'%(c,x)
102    def div(x,c): return '<div class="%s">%s</div>'%(c,x)
103    def match(x): 
104        if x is None: return ''
105        for w in matchList: x=re.sub(w,span(w,'highlight'),x)
106        return x
107    lt,gt='<b>&lt;</b>','<b>&gt;</b>'
108    ns=helper
109    if ns is None: ns=nsdumb(elem) 
110    strAttrib=''
111    for att in elem.attrib:
112        strAttrib+=' %s="%s"'%(span(att,'xmlAttrTyp'),span(elem.attrib[att],'xmlAttrVal'))
113    result='%s%s%s%s%s'%(lt,span(ns.strip(elem.tag),"xmlElemTag"),strAttrib,gt,match(elem.text))
114    children=len(elem)
115    if children:
116        for item in elem:
117            result+=et2html(item,matchList,helper=ns)
118        result+='%s%s/%s%s'%(match(item.tail),lt,span(ns.strip(elem.tag),'xmlElemTag'),gt)
119    else:
120        result+='%s/%s%s'%(lt,span(ns.strip(elem.tag),'xmlElemTag'),gt)
121    return div(result,'xmlElem')
122   
123def loadET(inputString):
124    ''' This method returns an elementtree object after some cleaning
125    of the string, essentially a hack to make sure that xml doesn't contain any
126    naughty & characters alone (typically from URL copies), or unescaped orphan
127    < or > signs ... and that the unicode has been processed to something that
128    might work'''
129     
130    if inputString is None: return None
131    inputString=re.sub(r'&(?!\w+;)', '&amp;', inputString)
132 
133    # first just try and do it so we don't waste time if we don't need to ...
134    try:
135        elem=ET.fromstring(inputString)
136        return elem
137    except:
138        pass
139        # and carry on
140
141    #ok, let's deal with orphan > and < signs then ...
142    subtool=subAI()
143    s=subtool.sub(inputString)
144
145    #now let's sort out an encoding
146    encodings=['utf-8','latin-1','iso-8859-1','ascii',]
147    elem=None
148    for option in encodings:
149        try:
150            s=s.encode(option,'replace')
151            try:
152                elem=ET.fromstring(s)
153            except:
154                s=re.sub('\n','<br/>LINE: ',s)
155                print '<p>%s</p>'%s
156                raise
157        except UnicodeError:
158            pass
159        else:
160            break
161    return elem
162
163def xml2text(xmlString):
164    tree=loadET(xmlString)
165    return et2text(tree)
166
167def xml2HTML(xmlString,**kw):
168    tree=loadET(xmlString)
169    return et2HTML(tree,**kw)
Note: See TracBrowser for help on using the repository browser.