source: TI02-CSML/trunk/csml/webparser/parseTest.py @ 4044

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/csml/webparser/parseTest.py@4044
Revision 4044, 6.9 KB checked in by domlowe, 12 years ago (diff)

removing out of date line

  • Property svn:executable set to *
Line 
1
2import cgi
3from cElementTree import *
4from parser import *
5from xml.dom.minidom import parseString
6import sys
7import elementtree.ElementTree as etree
8import traceback
9"""Test for GML/CSML parsing code
10"""
11def PrettyPrintHTML(elem,matchList=[]):
12    '''Lightweight HTML pretty printing of elementTree elements + highlight
13    any words which occur in the element text (and tails) which occur in matchList,
14    and formatted using a css something like this:
15    ===
16    DIV.xmlElem {PADDING-LEFT: 20px;}
17    .xmlAttrVal {COLOR:Red; }
18    .xmlAttrTyp {COLOR:Green; }
19    .xmlElemTag {COLOR:Blue; }
20.   highlight {BACKGROUND-COLOR:Yellow; }
21    ===
22    '''
23    def span(x,c): return '<span class="%s">%s</span>'%(c,x)
24    def div(x,c): return '<div class="%s">%s</div>'%(c,x)
25    def match(x):
26        if x is None: return ''
27        for w in matchList: x=re.sub(w,span(w,'highlight'),x)
28        return x
29    lt,gt='<b>&lt;</b>','<b>&gt;</b>'
30    strAttrib=''
31    for att in elem.attrib:
32        strAttrib+=' %s="%s"'%(span(att,'xmlAttrTyp'),span(elem.attrib[att],'xmlAttrVal'))
33    result='%s%s%s%s%s'%(lt,span(elem.tag,"xmlElemTag"),strAttrib,gt,match(elem.text))
34    children=len(elem)
35    if children:
36        for item in elem:
37            result+=PrettyPrintHTML(item)
38        result+='%s%s/%s%s'%(match(item.tail),lt,span(elem.tag,'xmlElemTag'),gt)
39    else:
40        result+='%s/%s%s'%(lt,span(elem.tag,'xmlElemTag'),gt)
41    return div(result,'xmlElem')
42
43
44def fixSpecial(file):
45    #Looks for the 5 'special' characters
46    #
47    return strXMLFixed
48
49# fixing up namespaces:
50def removeInlineNS(csmlstring):
51        #  removeInlineNS: function removes "inline" namespaces and declares them as part of the Dataset element.
52        #TODO: This whole thing is hardcoded and needs reviewing.
53       
54        #first need to update the ElementTree namespace map:
55        etree._namespace_map.update({
56        'http://www.opengis.net/om': 'om',  'http://www.opengis.net/gml': 'gml','http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'})
57   
58        #replace any fully qualified namespaces
59        csmlstring=csmlstring.replace('{http://www.opengis.net/gml}', 'gml:')
60        csmlstring=csmlstring.replace('{http://ndg.nerc.ac.uk/csml}','')
61        csmlstring=csmlstring.replace('{http://www.w3.org/1999/xlink}','xlink:')
62        csmlstring=csmlstring.replace('{http://www.opengis.net/om}','om:')
63        csmlstring=csmlstring.replace('{http://www.opengis.net/swe}','swe:')               
64        csmlstring=csmlstring.replace('{http://ndg.nerc.ac.uk/moles}','moles:')
65        #remove cmsl: prefixes
66        csmlstring=csmlstring.replace('<csml:','<')
67        csmlstring=csmlstring.replace('</csml:','</')
68       
69        #add namespace declarations at top of document
70        csmlstring=csmlstring.replace('<Dataset', '<Dataset xmlns="http://ndg.nerc.ac.uk/csml"  xmlns:gml="http://www.opengis.net/gml" xmlns:om="http://www.opengis.net/om" xmlns:swe="http://www.opengis.net/swe"  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:moles="http://ndg.nerc.ac.uk/moles"  xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://ndg.nerc.ac.uk/csml/XMLSchemas/CSMLAppSchema.xsd"')
71       
72        #this is only used when creating a new csml document
73        csmlstring=csmlstring.replace('ns0', 'gml')
74        csmlstring=csmlstring.replace('xmlns:ns1', 'xmlns:xlink')
75        csmlstring=csmlstring.replace('ns1:href', 'xlink:href')
76        csmlstring=csmlstring.replace('ns1:moles', 'xlink:moles')
77        csmlstring=csmlstring.replace('ns1:', '')  #the rest of the ns1s are CSML namespaces... due to the way it has been parsed.       
78        #what about SWE - need to check this ********TODO*****
79        return csmlstring
80
81
82
83def main():
84    sys.stderr = sys.stdout
85    print "Content-type: text/html"
86    print
87    print "<HEAD>"
88    print '<STYLE TYPE="text/css">' 
89    print 'DIV.xmlElem {PADDING-LEFT: 20px;}'
90    #print '.highlight {BACKGROUND-COLOR:Yellow; }'
91    print '.xmlAttrVal {COLOR:Red; }'
92    print '.xmlAttrTyp {COLOR:Green; }'
93    print '.xmlElemTag {COLOR:Blue; }'
94    print "</STYLE>"
95    print "<title>CSML Test Parser</title>"
96    print "<h1>CSML Version 2 Test Parser</h1>"
97    print "<p>Please note this is experimental, any questions please contact d.lowe@rl.ac.uk</p>"
98    print "<p>Parser version - 28 February 2007 - includes NDG security elements, fixed unicode encoding</p>"   
99    print "<h2>Load a CSML file to test:</h2>"   
100    form= cgi.FieldStorage()
101    if not form:
102        print """<form action="http://glue.badc.rl.ac.uk/cgi-bin/csml2/parseTest.py" method="POST" enctype="multipart/form-data">  <input type="file" name="filename"> <input type="submit"></form>"""   
103    elif form.has_key("filename"):
104        item = form["filename"]
105        if item.file:
106            data = item.file.read()
107            #print cgi.escape(data)
108            #data1 = cgi.escape(data)
109            f = open("/tmp/tempcsml.xml","wb")
110            f.write(data)
111            f.close()
112           
113            #NOW TRY AND PARSE THE FILE
114            #this is a fix to the  ElementTree namespace problem that namespaces are usually represented as ns0, ns1, ns2 etc.
115            etree._namespace_map.update({
116                    'http://www.opengis.net/om': 'om',  'http://www.opengis.net/gml': 'gml','http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'})
117           
118           
119            #######################################################
120            # ElementTree-based parser test ROUND TRIP
121            #######################################################
122            # 1. Test parsing from CSML file
123            try:
124                tree = ElementTree(file='/tmp/tempcsml.xml')
125            except:
126                traceback.print_exc()
127                print "<p>Could not parse CSML, problem reading from file.</p>"
128                sys.exit()
129            dataset = Dataset()
130           
131            #Calling the fromXML method reads the CSML into memory.
132            try:
133                dataset.fromXML(tree.getroot())
134            except:
135                traceback.print_exc()
136                print "<p>Could not parse CSM, problem reading XML into objects.</p>"
137                sys.exit()
138           
139
140           
141           
142            #This creates a new CSML document string from the CSML objects in memory.
143
144            try:
145                csml = dataset.toXML()
146            except:
147                traceback.print_exc()
148                print "<p>Could not parse CSML, problem writing XML from objects.</p>"
149                sys.exit()
150           
151
152            strCSML= PrettyPrintHTML(csml)
153            strCSML = removeInlineNS(strCSML)
154            encodings = ['utf-8', 'utf-16', 'iso-8859-1']
155            for encoding in encodings:
156                try:
157                    strCSML=str(strCSML.encode(encoding))
158                    print strCSML
159                except UnicodeError:
160                    pass
161                else:
162                    break
163            print "</HEAD>"
164main()
165
Note: See TracBrowser for help on using the repository browser.