source: TI07-MOLES/trunk/StubB/XSLT/browse/portal/cgi/browse/ETxmlView.py @ 1175

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI07-MOLES/trunk/StubB/XSLT/browse/portal/cgi/browse/ETxmlView.py@1175
Revision 1175, 4.9 KB checked in by lawrence, 14 years ago (diff)

Sundry problems fixed with respect to namespaces etc

Line 
1# Copyright Bryan Lawrence, Rutherford Appleton Laboratory, CCLRC, 2006
2#
3# This code is made available under the GPL, if you don't know what
4# that means, you have no rights to copy it or use it!
5#
6# Code to support xml and elementTree viewing as text and html
7
8import ElementTree as ET
9import re
10from sub_orphan import *
11
12class nsdumb:
13    ''' provides an xpath interface to element tree nodes which
14    is namespace agnostic '''
15    def __init__(self,root=None):
16        ''' Provide a root element with namespace definitions when
17        instantiatin '''
18        if root is None: 
19            self.xmlns=''
20            return
21        ns=['xmlns','{http://www.w3.org/2001/XMLSchema-instance}schemaLocation']
22        for i in ns: 
23            if i in root.keys():
24                self.xmlns='{%s}'%root.attrib[i].split(' ')[0]
25                break
26            else:
27                self.xmlns=''
28    def __str__(self):
29        return 'Element Tree namespace helper with namespace: [%s]'%self.xmlns
30    def getText(self,elem,xpathExpression,multiple=0):
31        ''' Get a text object sensibly '''
32        if multiple:
33                r=elem.findall(self.xmlns+xpathExpression)
34        else:
35                r=[elem.find(self.xmlns+xpathExpression),]
36        try:
37                rr=[i.text for i in r]
38        except:
39                rr=['',]
40        if multiple: 
41                return rr
42        else: return rr[0] 
43       
44    def find(self,elem,xpathExpression):
45       ''' Return relevant subelement '''
46       xe=self.xmlns+xpathExpression
47       return elem.find(xe)
48   
49    def findall(self,elem,xpathExpression):
50       ''' Return all relevant subelements '''
51       xe=self.xmlns+xpathExpression
52       return elem.findall(xe)
53
54def et2text(elem,indent='',html=0,space='   '):
55        '''Lightweight pretty printing of elementTree elements'''
56        def estrip(elem):
57                ''' Just want to get rid of unwanted whitespace '''
58                if elem is None:
59                        return ''
60                else:
61                        return elem.strip()
62        strAttrib=''
63        for att in elem.attrib:
64                strAttrib+=' %s="%s"'%(att,elem.attrib[att])
65        result='%s<%s%s>%s'%(indent,elem.tag,strAttrib,estrip(elem.text))
66        children=len(elem)
67        if children:
68                for item in elem:
69                        result+='\n'+et2text(item,indent=indent+space)
70                result+='\n%s%s</%s>'%(indent,estrip(item.tail),elem.tag)
71        else:
72                result+='</%s>'%(elem.tag)
73        return result
74
75
76def et2html(elem,matchList=[],number=0):
77    #this method is NDG code ... copyright CCLRC ...
78    '''Lightweight HTML pretty printing of elementTree elements + highlight
79    any words which occur in the element text (and tails) which occur in matchList,
80    and formatted using a css something like this:
81    ===
82    DIV.xmlElem {PADDING-LEFT: 20px;}
83    .xmlAttrVal {COLOR:Red; }
84    .xmlAttrTyp {COLOR:Green; }
85    .xmlElemTag {COLOR:Blue; }
86.   highlight {BACKGROUND-COLOR:Yellow; }
87    ===
88    Line number is not yet implemented.
89    '''
90    def span(x,c): return '<span class="%s">%s</span>'%(c,x)
91    def div(x,c): return '<div class="%s">%s</div>'%(c,x)
92    def match(x): 
93        if x is None: return ''
94        for w in matchList: x=re.sub(w,span(w,'highlight'),x)
95        return x
96    lt,gt='<b>&lt;</b>','<b>&gt;</b>'
97    strAttrib=''
98    for att in elem.attrib:
99        strAttrib+=' %s="%s"'%(span(att,'xmlAttrTyp'),span(elem.attrib[att],'xmlAttrVal'))
100    result='%s%s%s%s%s'%(lt,span(elem.tag,"xmlElemTag"),strAttrib,gt,match(elem.text))
101    children=len(elem)
102    if children:
103        for item in elem:
104            result+=et2html(item,matchList)
105        result+='%s%s/%s%s'%(match(item.tail),lt,span(elem.tag,'xmlElemTag'),gt)
106    else:
107        result+='%s/%s%s'%(lt,span(elem.tag,'xmlElemTag'),gt)
108    return div(result,'xmlElem')
109   
110def loadET(inputString):
111    ''' This method returns an elementtree object after some cleaning
112    of the string, essentially a hack to make sure that xml doesn't contain any
113    naughty & characters alone (typically from URL copies), or unescaped orphan
114    < or > signs ... and that the unicode has been processed to something that
115    might work'''
116     
117    if inputString is None: return None
118    inputString=re.sub(r'&(?!\w+;)', '&amp;', inputString)
119 
120    # first just try and do it so we don't waste time if we don't need to ...
121    try:
122        elem=ET.fromstring(inputString)
123        return elem
124    except:
125        pass
126        # and carry on
127
128    #ok, let's deal with orphan > and < signs then ...
129    subtool=subAI()
130    s=subtool.sub(inputString)
131
132    #now let's sort out an encoding
133    encodings=['utf-8','latin-1','iso-8859-1','ascii',]
134    elem=None
135    for option in encodings:
136        try:
137            s=s.encode(option,'replace')
138            try:
139                elem=ET.fromstring(s)
140            except:
141                s=re.sub('\n','<br/>LINE: ',s)
142                print '<p>%s</p>'%s
143                raise
144        except UnicodeError:
145            pass
146        else:
147            break
148    return elem
149
150def xml2text(xmlString):
151    tree=loadET(xmlString)
152    return et2text(tree)
153
154def xml2HTML(xmlString,**kw):
155    tree=loadET(xmlString)
156    return et2HTML(tree,**kw)
Note: See TracBrowser for help on using the repository browser.