source: TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/ETxmlView.py @ 2615

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/ETxmlView.py@2760
Revision 2615, 8.0 KB checked in by lawrence, 13 years ago (diff)

Moving some of the wsgi stack into the ows stack.
At this point an xml dif retrieve works in ows_common.

Line 
1# Copyright Bryan Lawrence, Rutherford Appleton Laboratory, CCLRC, 2006
2#
3# This code is made available under the GPL, if you don't know what
4# that means, you have no rights to copy it or use it!
5#
6# Code to support xml and elementTree viewing as text and html
7try: #python 2.5
8    from xml.etree import ElementTree as ET
9except ImportError:
10    try:
11        # if you've installed it yourself it comes this way
12        import ElementTree as ET
13    except ImportError:
14        # if you've egged it this is the way it comes
15        from elementtree import ElementTree as ET
16import re
17
18class subAI:
19    ''' This is Alan Iwi's substitute and replace orphan <> code '''
20    def __init__(self):
21        self.r1=re.compile('<([^>]*(<|$))')
22        self.r2=re.compile('((^|>)[^<]*)>')
23    def sub(self,s):
24        old=''
25        while s != old:
26            old=s
27            s=self.r1.sub(r'&lt;\1',s)
28            s=self.r2.sub(r'\1&gt;',s)
29        return s
30       
31class nsdumb:
32    ''' provides an xpath interface to element tree nodes which
33    is namespace agnostic '''
34    def __init__(self,root=None,encoding='utf-8'):
35        ''' Provide a root element with namespace definitions when
36        instantiatin '''
37        self.encoding=encoding
38        if root is None: 
39            self.xmlns=''
40            return
41        ns=['xmlns','{http://www.w3.org/2001/XMLSchema-instance}schemaLocation']
42        for i in ns: 
43            if i in root.keys():
44                self.xmlns='{%s}'%root.attrib[i].split(' ')[0]
45                break
46            else:
47                if root.tag[0]<>'{': 
48                    self.xmlns=''
49                else:
50                    ns,local=root.tag.split('}')
51                    self.xmlns='{%s}'%ns[1:]
52       
53    def __str__(self):
54        return 'Element Tree namespace helper with namespace: [%s]'%self.xmlns
55   
56    def __distributens(self,xpathExpression):
57        ''' Actually we only support tag finding in this '''
58        tags=xpathExpression.split('/')
59        new=''
60        for t in tags: new+=self.xmlns+t+'/'
61        new=new[0:-1]
62        return new
63    def getText(self,elem,xpathExpression,multiple=0):
64        ''' Get a text object sensibly '''
65        if elem is None: 
66            if multiple:
67                return ['',]
68            else: return '' 
69        if multiple:
70                r=elem.findall(self.__distributens(xpathExpression))
71        else:
72                r=[elem.find(self.__distributens(xpathExpression)),]
73        try:  # if element is None, this should fail ...
74                rr=[]
75                for i in r:
76                    t=i.text
77                    if t is not None: 
78                        #rr.append(t.decode(self.encoding))
79                        rr.append(t)
80                    else: rr.append('')
81        except:
82                rr=['',]
83        if multiple: 
84                return rr
85        else: return rr[0]
86       
87    def find(self,elem,xpathExpression):
88       ''' Return relevant subelement '''
89       if elem is None: return ''
90       xe=self.__distributens(xpathExpression)
91       return elem.find(xe)
92   
93    def findall(self,elem,xpathExpression):
94       ''' Return all relevant subelements '''
95       if elem is None: return []
96       xe=self.__distributens(xpathExpression)
97       return elem.findall(xe)
98
99    def strip(self,tag):
100        ''' Given a tag, strip the default namespace '''
101        return tag.lstrip(self.xmlns)
102
103def et2text(elem,indent='',html=0,space='   ',helper=None):
104        '''Lightweight pretty printing of elementTree elements'''
105        def estrip(elem):
106                ''' Just want to get rid of unwanted whitespace '''
107                if elem is None:
108                        return ''
109                else:
110                        return elem.strip()
111        ns=helper
112        if ns is None: ns=nsdumb(elem)
113        strAttrib=''
114        for att in elem.attrib:
115                strAttrib+=' %s="%s"'%(att,elem.attrib[att])
116        result='%s<%s%s>%s'%(indent,ns.strip(elem.tag),strAttrib,estrip(elem.text))
117        children=len(elem)
118        if children:
119                for item in elem:
120                        result+='\n'+et2text(item,indent=indent+space,helper=ns)
121                result+='\n%s%s</%s>'%(indent,estrip(item.tail),ns.strip(elem.tag))
122        else:
123                result+='</%s>'%(ns.strip(elem.tag))
124        return result
125
126
127def et2html(elem,matchList=[],number=0,helper=None):
128    #this method is NDG code ... copyright CCLRC ...
129    '''Lightweight HTML pretty printing of elementTree elements + highlight
130    any words which occur in the element text (and tails) which occur in matchList,
131    and formatted using a css something like this:
132    ===
133    DIV.xmlElem {PADDING-LEFT: 20px;}
134    .xmlAttrVal {COLOR:Red; }
135    .xmlAttrTyp {COLOR:Green; }
136    .xmlElemTag {COLOR:Blue; }
137.   highlight {BACKGROUND-COLOR:Yellow; }
138    ===
139    Line number is not yet implemented.
140    '''
141    def span(x,c): return '<span class="%s">%s</span>'%(c,x)
142    def div(x,c): return '<div class="%s">%s</div>'%(c,x)
143    def match(x): 
144        if x is None: return ''
145        for w in matchList: x=re.sub(w,span(w,'highlight'),x)
146        return x
147    lt,gt='<b>&lt;</b>','<b>&gt;</b>'
148    ns=helper
149    if ns is None: ns=nsdumb(elem) 
150    strAttrib=''
151    for att in elem.attrib:
152        strAttrib+=' %s="%s"'%(span(att,'xmlAttrTyp'),span(elem.attrib[att],'xmlAttrVal'))
153    result='%s%s%s%s%s'%(lt,span(ns.strip(elem.tag),"xmlElemTag"),strAttrib,gt,match(elem.text))
154    children=len(elem)
155    if children:
156        for item in elem:
157            result+=et2html(item,matchList,helper=ns)
158        result+='%s%s/%s%s'%(match(item.tail),lt,span(ns.strip(elem.tag),'xmlElemTag'),gt)
159    else:
160        result+='%s/%s%s'%(lt,span(ns.strip(elem.tag),'xmlElemTag'),gt)
161    return div(result,'xmlElem')
162   
163def loadET(inputString):
164    ''' This method returns an elementtree object after some cleaning
165    of the string, essentially a hack to make sure that xml doesn't contain any
166    naughty & characters alone (typically from URL copies), or unescaped orphan
167    < or > signs ... and that the unicode has been processed to something that
168    might work'''
169   
170    if inputString is None: return None
171    inputString=re.sub(r'&(?!\w+;)', '&amp;', inputString)
172 
173    # first just try and do it so we don't waste time if we don't need to ...
174    try:
175        elem=ET.fromstring(inputString)
176        return elem
177    except:
178        pass
179        # and carry on
180
181    #ok, let's deal with orphan > and < signs then ...
182    subtool=subAI()
183    s=subtool.sub(inputString)
184
185    #now let's sort out an encoding
186    encodings=['utf-8','latin-1','iso-8859-1','ascii',]
187    elem=None
188    for option in encodings:
189        try:
190            s=s.encode(option,'replace')
191            try:
192                elem=ET.fromstring(s)
193            except:
194                s=re.sub('\n','<br/>LINE: ',s)
195                print '<p>%s</p>'%s
196                raise
197        except UnicodeError:
198            pass
199        else:
200            break
201    return elem
202
203def xml2text(xmlString):
204    tree=loadET(xmlString)
205    return et2text(tree)
206
207def xml2HTML(xmlString,**kw):
208    tree=loadET(xmlString)
209    return et2HTML(tree,**kw)
210
211import unittest
212from os.path import splitext
213
214class TestCase(unittest.TestCase):
215    """
216    """
217    def setUp(self):
218        ''' Load example files for testing, and get pointers to their dataset identifiers '''
219        inputFiles = ['examples/neodc.eg1.dif','examples/ukho.eg1.mdip',
220                      'examples/bodc.eg2.edmed.dif']
221        indexes={'.dif':'Entry_ID','.mdip':'DatasetIdentifier'}
222        results=['NOCSDAT192','RSDRA2006000377384','grid.bodc.nerc.ac.uk__DIF__EDMED1048006']
223        ids=[]
224        for f in inputFiles:
225            ps=splitext(f)
226            ids.append(indexes[ps[1]])
227        files=[file(i,'r') for i in inputFiles]
228        self.strings=[i.read() for i in files]
229        self.ids=ids
230        self.results=results
231
232    def testEntries(self):
233        ''' Testing the file objects can be loaded using loadET'''
234        for s in self.strings:
235            tree=loadET(s)
236            #for i in tree: print i.tag
237           
238    def testnsDump(self):
239        ''' Test that nsdumb can mediate access to identifiers '''
240        for i in range(len(self.strings)):
241            tree=loadET(self.strings[i])
242            helper=nsdumb(tree)
243            self.assertEqual(helper.getText(tree,self.ids[i]),self.results[i])
244       
245
246if __name__=="__main__":
247    unittest.main()
Note: See TracBrowser for help on using the repository browser.