source: TI02-CSML/trunk/csml/webparser/parseTest.py @ 2217

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/csml/webparser/parseTest.py@2217
Revision 2217, 6.9 KB checked in by domlowe, 13 years ago (diff)

online v2 parser code working

  • Property svn:executable set to *
Line 
1#!/usr/local/NDG/ActivePython-2.4/bin/python2.4
2
3import cgi
4from cElementTree import *
5from parser import *
6from xml.dom.minidom import parseString
7import sys
8import elementtree.ElementTree as etree
9import traceback
10"""Test for GML/CSML parsing code
11"""
12def PrettyPrintHTML(elem,matchList=[]):
13    '''Lightweight HTML pretty printing of elementTree elements + highlight
14    any words which occur in the element text (and tails) which occur in matchList,
15    and formatted using a css something like this:
16    ===
17    DIV.xmlElem {PADDING-LEFT: 20px;}
18    .xmlAttrVal {COLOR:Red; }
19    .xmlAttrTyp {COLOR:Green; }
20    .xmlElemTag {COLOR:Blue; }
21.   highlight {BACKGROUND-COLOR:Yellow; }
22    ===
23    '''
24    def span(x,c): return '<span class="%s">%s</span>'%(c,x)
25    def div(x,c): return '<div class="%s">%s</div>'%(c,x)
26    def match(x):
27        if x is None: return ''
28        for w in matchList: x=re.sub(w,span(w,'highlight'),x)
29        return x
30    lt,gt='<b>&lt;</b>','<b>&gt;</b>'
31    strAttrib=''
32    for att in elem.attrib:
33        strAttrib+=' %s="%s"'%(span(att,'xmlAttrTyp'),span(elem.attrib[att],'xmlAttrVal'))
34    result='%s%s%s%s%s'%(lt,span(elem.tag,"xmlElemTag"),strAttrib,gt,match(elem.text))
35    children=len(elem)
36    if children:
37        for item in elem:
38            result+=PrettyPrintHTML(item)
39        result+='%s%s/%s%s'%(match(item.tail),lt,span(elem.tag,'xmlElemTag'),gt)
40    else:
41        result+='%s/%s%s'%(lt,span(elem.tag,'xmlElemTag'),gt)
42    return div(result,'xmlElem')
43
44
45def fixSpecial(file):
46    #Looks for the 5 'special' characters
47    #
48    return strXMLFixed
49
50# fixing up namespaces:
51def removeInlineNS(csmlstring):
52        #  removeInlineNS: function removes "inline" namespaces and declares them as part of the Dataset element.
53        #TODO: This whole thing is hardcoded and needs reviewing.
54       
55        #first need to update the ElementTree namespace map:
56        etree._namespace_map.update({
57        'http://www.opengis.net/om': 'om',  'http://www.opengis.net/gml': 'gml','http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'})
58   
59        #replace any fully qualified namespaces
60        csmlstring=csmlstring.replace('{http://www.opengis.net/gml}', 'gml:')
61        csmlstring=csmlstring.replace('{http://ndg.nerc.ac.uk/csml}','')
62        csmlstring=csmlstring.replace('{http://www.w3.org/1999/xlink}','xlink:')
63        csmlstring=csmlstring.replace('{http://www.opengis.net/om}','om:')
64        csmlstring=csmlstring.replace('{http://www.opengis.net/swe}','swe:')               
65        csmlstring=csmlstring.replace('{http://ndg.nerc.ac.uk/moles}','moles:')
66        #remove cmsl: prefixes
67        csmlstring=csmlstring.replace('<csml:','<')
68        csmlstring=csmlstring.replace('</csml:','</')
69       
70        #add namespace declarations at top of document
71        csmlstring=csmlstring.replace('<Dataset', '<Dataset xmlns="http://ndg.nerc.ac.uk/csml"  xmlns:gml="http://www.opengis.net/gml" xmlns:om="http://www.opengis.net/om" xmlns:swe="http://www.opengis.net/swe"  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:moles="http://ndg.nerc.ac.uk/moles"  xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://ndg.nerc.ac.uk/csml/XMLSchemas/CSMLAppSchema.xsd"')
72       
73        #this is only used when creating a new csml document
74        csmlstring=csmlstring.replace('ns0', 'gml')
75        csmlstring=csmlstring.replace('xmlns:ns1', 'xmlns:xlink')
76        csmlstring=csmlstring.replace('ns1:href', 'xlink:href')
77        csmlstring=csmlstring.replace('ns1:moles', 'xlink:moles')
78        csmlstring=csmlstring.replace('ns1:', '')  #the rest of the ns1s are CSML namespaces... due to the way it has been parsed.       
79        #what about SWE - need to check this ********TODO*****
80        return csmlstring
81
82
83
84def main():
85    sys.stderr = sys.stdout
86    print "Content-type: text/html"
87    print
88    print "<HEAD>"
89    print '<STYLE TYPE="text/css">' 
90    print 'DIV.xmlElem {PADDING-LEFT: 20px;}'
91    #print '.highlight {BACKGROUND-COLOR:Yellow; }'
92    print '.xmlAttrVal {COLOR:Red; }'
93    print '.xmlAttrTyp {COLOR:Green; }'
94    print '.xmlElemTag {COLOR:Blue; }'
95    print "</STYLE>"
96    print "<title>CSML Test Parser</title>"
97    print "<h1>CSML Version 2 Test Parser</h1>"
98    print "<p>Please note this is experimental, any questions please contact d.lowe@rl.ac.uk</p>"
99    print "<p>Parser version - 28 February 2007 - includes NDG security elements, fixed unicode encoding</p>"   
100    print "<h2>Load a CSML file totest:</h2>"   
101    form= cgi.FieldStorage()
102    if not form:
103        print """<form action="http://glue.badc.rl.ac.uk/cgi-bin/csml2/parseTest.py" method="POST" enctype="multipart/form-data">  <input type="file" name="filename"> <input type="submit"></form>"""   
104    elif form.has_key("filename"):
105        item = form["filename"]
106        if item.file:
107            data = item.file.read()
108            #print cgi.escape(data)
109            #data1 = cgi.escape(data)
110            f = open("/tmp/tempcsml.xml","wb")
111            f.write(data)
112            f.close()
113           
114            #NOW TRY AND PARSE THE FILE
115            #this is a fix to the  ElementTree namespace problem that namespaces are usually represented as ns0, ns1, ns2 etc.
116            etree._namespace_map.update({
117                    'http://www.opengis.net/om': 'om',  'http://www.opengis.net/gml': 'gml','http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'})
118           
119           
120            #######################################################
121            # ElementTree-based parser test ROUND TRIP
122            #######################################################
123            # 1. Test parsing from CSML file
124            try:
125                tree = ElementTree(file='/tmp/tempcsml.xml')
126            except:
127                traceback.print_exc()
128                print "<p>Could not parse CSML</p>"
129                sys.exit()
130            dataset = Dataset()
131           
132            #Calling the fromXML method reads the CSML into memory.
133            try:
134                dataset.fromXML(tree.getroot())
135            except:
136                traceback.print_exc()
137                print "<p>Could not parse CSML</p>"
138                sys.exit()
139           
140
141           
142           
143            #This creates a new CSML document string from the CSML objects in memory.
144
145            try:
146                csml = dataset.toXML()
147            except:
148                traceback.print_exc()
149                print "<p>Could not parse CSML</p>"
150                sys.exit()
151           
152
153            strCSML= PrettyPrintHTML(csml)
154            strCSML = removeInlineNS(strCSML)
155            encodings = ['utf-8', 'utf-16', 'iso-8859-1']
156            for encoding in encodings:
157                try:
158                    strCSML=str(strCSML.encode(encoding))
159                    print strCSML
160                except UnicodeError:
161                    pass
162                else:
163                    break
164            print "</HEAD>"
165main()
166
Note: See TracBrowser for help on using the repository browser.