source: TI07-MOLES/trunk/PythonCode/browse/portal/cgi/browse/ETxmlView.py @ 1615

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI07-MOLES/trunk/PythonCode/browse/portal/cgi/browse/ETxmlView.py@1615
Revision 1615, 5.6 KB checked in by lawrence, 14 years ago (diff)

Modifications to discovery (and browse) to support ndgDoc as an interim
measure ...

Line 
1# Copyright Bryan Lawrence, Rutherford Appleton Laboratory, CCLRC, 2006
2#
3# This code is made available under the GPL, if you don't know what
4# that means, you have no rights to copy it or use it!
5#
6# Code to support xml and elementTree viewing as text and html
7
8try: #python 2.5
9    import xml.etree.ElementTree as ET
10except:
11    import ElementTree as ET
12import re
13from sub_orphan import *
14
15class nsdumb:
16    ''' provides an xpath interface to element tree nodes which
17    is namespace agnostic '''
18    def __init__(self,root=None):
19        ''' Provide a root element with namespace definitions when
20        instantiatin '''
21        if root is None: 
22            self.xmlns=''
23            return
24        ns=['xmlns','{http://www.w3.org/2001/XMLSchema-instance}schemaLocation']
25        for i in ns: 
26            if i in root.keys():
27                self.xmlns='{%s}'%root.attrib[i].split(' ')[0]
28                break
29            else:
30                self.xmlns=''
31    def __str__(self):
32        return 'Element Tree namespace helper with namespace: [%s]'%self.xmlns
33    def getText(self,elem,xpathExpression,multiple=0):
34        ''' Get a text object sensibly '''
35        if elem is None: 
36            if multiple:
37                return ['',]
38            else: return '' 
39        if multiple:
40                r=elem.findall(self.xmlns+xpathExpression)
41        else:
42                r=[elem.find(self.xmlns+xpathExpression),]
43        try:  # if element is None, this should fail ...
44                rr=[]
45                for i in r:
46                    t=i.text
47                    if t is not None: 
48                        rr.append(t)
49                    else: rr.append('')
50        except:
51                rr=['',]
52        if multiple: 
53                return rr
54        else: return rr[0] 
55       
56    def find(self,elem,xpathExpression):
57       ''' Return relevant subelement '''
58       if elem is None: return ''
59       xe=self.xmlns+xpathExpression
60       return elem.find(xe)
61   
62    def findall(self,elem,xpathExpression):
63       ''' Return all relevant subelements '''
64       if elem is None: return []
65       xe=self.xmlns+xpathExpression
66       return elem.findall(xe)
67
68    def strip(self,tag):
69        ''' Given a tag, strip the default namespace '''
70        return tag.lstrip(self.xmlns)
71
72def et2text(elem,indent='',html=0,space='   ',helper=None):
73        '''Lightweight pretty printing of elementTree elements'''
74        def estrip(elem):
75                ''' Just want to get rid of unwanted whitespace '''
76                if elem is None:
77                        return ''
78                else:
79                        return elem.strip()
80        ns=helper
81        if ns is None: ns=nsdumb(elem)
82        strAttrib=''
83        for att in elem.attrib:
84                strAttrib+=' %s="%s"'%(att,elem.attrib[att])
85        result='%s<%s%s>%s'%(indent,ns.strip(elem.tag),strAttrib,estrip(elem.text))
86        children=len(elem)
87        if children:
88                for item in elem:
89                        result+='\n'+et2text(item,indent=indent+space,helper=ns)
90                result+='\n%s%s</%s>'%(indent,estrip(item.tail),ns.strip(elem.tag))
91        else:
92                result+='</%s>'%(ns.strip(elem.tag))
93        return result
94
95
96def et2html(elem,matchList=[],number=0,helper=None):
97    #this method is NDG code ... copyright CCLRC ...
98    '''Lightweight HTML pretty printing of elementTree elements + highlight
99    any words which occur in the element text (and tails) which occur in matchList,
100    and formatted using a css something like this:
101    ===
102    DIV.xmlElem {PADDING-LEFT: 20px;}
103    .xmlAttrVal {COLOR:Red; }
104    .xmlAttrTyp {COLOR:Green; }
105    .xmlElemTag {COLOR:Blue; }
106.   highlight {BACKGROUND-COLOR:Yellow; }
107    ===
108    Line number is not yet implemented.
109    '''
110    def span(x,c): return '<span class="%s">%s</span>'%(c,x)
111    def div(x,c): return '<div class="%s">%s</div>'%(c,x)
112    def match(x): 
113        if x is None: return ''
114        for w in matchList: x=re.sub(w,span(w,'highlight'),x)
115        return x
116    lt,gt='<b>&lt;</b>','<b>&gt;</b>'
117    ns=helper
118    if ns is None: ns=nsdumb(elem) 
119    strAttrib=''
120    for att in elem.attrib:
121        strAttrib+=' %s="%s"'%(span(att,'xmlAttrTyp'),span(elem.attrib[att],'xmlAttrVal'))
122    result='%s%s%s%s%s'%(lt,span(ns.strip(elem.tag),"xmlElemTag"),strAttrib,gt,match(elem.text))
123    children=len(elem)
124    if children:
125        for item in elem:
126            result+=et2html(item,matchList,helper=ns)
127        result+='%s%s/%s%s'%(match(item.tail),lt,span(ns.strip(elem.tag),'xmlElemTag'),gt)
128    else:
129        result+='%s/%s%s'%(lt,span(ns.strip(elem.tag),'xmlElemTag'),gt)
130    return div(result,'xmlElem')
131   
132def loadET(inputString):
133    ''' This method returns an elementtree object after some cleaning
134    of the string, essentially a hack to make sure that xml doesn't contain any
135    naughty & characters alone (typically from URL copies), or unescaped orphan
136    < or > signs ... and that the unicode has been processed to something that
137    might work'''
138     
139    if inputString is None: return None
140    inputString=re.sub(r'&(?!\w+;)', '&amp;', inputString)
141 
142    # first just try and do it so we don't waste time if we don't need to ...
143    try:
144        elem=ET.fromstring(inputString)
145        return elem
146    except:
147        pass
148        # and carry on
149
150    #ok, let's deal with orphan > and < signs then ...
151    subtool=subAI()
152    s=subtool.sub(inputString)
153
154    #now let's sort out an encoding
155    encodings=['utf-8','latin-1','iso-8859-1','ascii',]
156    elem=None
157    for option in encodings:
158        try:
159            s=s.encode(option,'replace')
160            try:
161                elem=ET.fromstring(s)
162            except:
163                s=re.sub('\n','<br/>LINE: ',s)
164                print '<p>%s</p>'%s
165                raise
166        except UnicodeError:
167            pass
168        else:
169            break
170    return elem
171
172def xml2text(xmlString):
173    tree=loadET(xmlString)
174    return et2text(tree)
175
176def xml2HTML(xmlString,**kw):
177    tree=loadET(xmlString)
178    return et2HTML(tree,**kw)
Note: See TracBrowser for help on using the repository browser.