source: TI02-CSML/trunk/csml/webparser/parseTest.py @ 4055

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/csml/webparser/parseTest.py@4055
Revision 4055, 7.3 KB checked in by domlowe, 12 years ago (diff)

minor config updates to online parser

  • Property svn:executable set to *
Line 
1#!/usr/bin/python
2import logging
3logging.basicConfig(level=logging.DEBUG, filename='logs/parser2.log')
4import cgi
5from cElementTree import *
6from parser import *
7from xml.dom.minidom import parseString
8import sys
9import elementtree.ElementTree as etree
10import traceback
11logging.debug('imports all ok')
12
13"""Test for GML/CSML parsing code
14"""
15def PrettyPrintHTML(elem,matchList=[]):
16    '''Lightweight HTML pretty printing of elementTree elements + highlight
17    any words which occur in the element text (and tails) which occur in matchList,
18    and formatted using a css something like this:
19    ===
20    DIV.xmlElem {PADDING-LEFT: 20px;}
21    .xmlAttrVal {COLOR:Red; }
22    .xmlAttrTyp {COLOR:Green; }
23    .xmlElemTag {COLOR:Blue; }
24.   highlight {BACKGROUND-COLOR:Yellow; }
25    ===
26    '''
27    def span(x,c): return '<span class="%s">%s</span>'%(c,x)
28    def div(x,c): return '<div class="%s">%s</div>'%(c,x)
29    def match(x):
30        if x is None: return ''
31        for w in matchList: x=re.sub(w,span(w,'highlight'),x)
32        return x
33    lt,gt='<b>&lt;</b>','<b>&gt;</b>'
34    strAttrib=''
35    for att in elem.attrib:
36        strAttrib+=' %s="%s"'%(span(att,'xmlAttrTyp'),span(elem.attrib[att],'xmlAttrVal'))
37    result='%s%s%s%s%s'%(lt,span(elem.tag,"xmlElemTag"),strAttrib,gt,match(elem.text))
38    children=len(elem)
39    if children:
40        for item in elem:
41            result+=PrettyPrintHTML(item)
42        result+='%s%s/%s%s'%(match(item.tail),lt,span(elem.tag,'xmlElemTag'),gt)
43    else:
44        result+='%s/%s%s'%(lt,span(elem.tag,'xmlElemTag'),gt)
45    return div(result,'xmlElem')
46
47
48def fixSpecial(file):
49    #Looks for the 5 'special' characters
50    #
51    return strXMLFixed
52
53# fixing up namespaces:
54def removeInlineNS(csmlstring):
55        #  removeInlineNS: function removes "inline" namespaces and declares them as part of the Dataset element.
56        #TODO: This whole thing is hardcoded and needs reviewing.
57       
58        #first need to update the ElementTree namespace map:
59        etree._namespace_map.update({
60        'http://www.opengis.net/om': 'om',  'http://www.opengis.net/gml': 'gml','http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'})
61   
62        #replace any fully qualified namespaces
63        csmlstring=csmlstring.replace('{http://www.opengis.net/gml}', 'gml:')
64        csmlstring=csmlstring.replace('{http://ndg.nerc.ac.uk/csml}','')
65        csmlstring=csmlstring.replace('{http://www.w3.org/1999/xlink}','xlink:')
66        csmlstring=csmlstring.replace('{http://www.opengis.net/om}','om:')
67        csmlstring=csmlstring.replace('{http://www.opengis.net/swe}','swe:')               
68        csmlstring=csmlstring.replace('{http://ndg.nerc.ac.uk/moles}','moles:')
69        #remove cmsl: prefixes
70        csmlstring=csmlstring.replace('<csml:','<')
71        csmlstring=csmlstring.replace('</csml:','</')
72       
73        #add namespace declarations at top of document
74        csmlstring=csmlstring.replace('<Dataset', '<Dataset xmlns="http://ndg.nerc.ac.uk/csml"  xmlns:gml="http://www.opengis.net/gml" xmlns:om="http://www.opengis.net/om" xmlns:swe="http://www.opengis.net/swe"  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:moles="http://ndg.nerc.ac.uk/moles"  xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://ndg.nerc.ac.uk/csml/XMLSchemas/CSMLAppSchema.xsd"')
75       
76        #this is only used when creating a new csml document
77        csmlstring=csmlstring.replace('ns0', 'gml')
78        csmlstring=csmlstring.replace('xmlns:ns1', 'xmlns:xlink')
79        csmlstring=csmlstring.replace('ns1:href', 'xlink:href')
80        csmlstring=csmlstring.replace('ns1:moles', 'xlink:moles')
81        csmlstring=csmlstring.replace('ns1:', '')  #the rest of the ns1s are CSML namespaces... due to the way it has been parsed.       
82        #what about SWE - need to check this ********TODO*****
83        return csmlstring
84
85
86
87def main():
88    logging.debug('starting main')
89    sys.stderr = sys.stdout
90    print "Content-type: text/html"
91    print
92    print "<HEAD>"
93    print '<STYLE TYPE="text/css">' 
94    print 'DIV.xmlElem {PADDING-LEFT: 20px;}'
95    #print '.highlight {BACKGROUND-COLOR:Yellow; }'
96    print '.xmlAttrVal {COLOR:Red; }'
97    print '.xmlAttrTyp {COLOR:Green; }'
98    print '.xmlElemTag {COLOR:Blue; }'
99    print "</STYLE>"
100    print "<title>CSML Test Parser</title>"
101    print "<h1>CSML Version 2 Test Parser</h1>"
102    print "<p>Please note this is experimental, any questions please contact d.lowe@rl.ac.uk</p>"
103    print "<p>Parser version - 18 July 2008 - moved to new server and update parser to latest from svn</p>"   
104    print "<h2>Load a CSML file to test against the parser:</h2>"   
105    logging.debug('ready to load form')
106    form= cgi.FieldStorage()
107    logging.debug('initialised form %s'%form)
108    if not form:
109        logging.debug('not form')
110        print """<form action="http://csml.badc.rl.ac.uk/cgi-bin/parseTest.py" method="POST" enctype="multipart/form-data">  <input type="file" name="filename"> <input type="submit"></form>"""   
111    elif form.has_key("filename"):
112        item = form["filename"]
113        if item.file:
114            data = item.file.read()
115            #print cgi.escape(data)
116            #data1 = cgi.escape(data)
117            f = open("temp/tempcsml.xml","wb")
118            f.write(data)
119            f.close()
120           
121            #NOW TRY AND PARSE THE FILE
122            #this is a fix to the  ElementTree namespace problem that namespaces are usually represented as ns0, ns1, ns2 etc.
123            etree._namespace_map.update({
124                    'http://www.opengis.net/om': 'om',  'http://www.opengis.net/gml': 'gml','http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'})
125           
126           
127            #######################################################
128            # ElementTree-based parser test ROUND TRIP
129            #######################################################
130            # 1. Test parsing from CSML file
131            try:
132                tree = ElementTree(file='temp/tempcsml.xml')
133            except:
134                traceback.print_exc()
135                print "<p>Could not parse CSML, problem reading from file.</p>"
136                sys.exit()
137            dataset = Dataset()
138           
139            #Calling the fromXML method reads the CSML into memory.
140            try:
141                dataset.fromXML(tree.getroot())
142            except:
143                traceback.print_exc()
144                print "<p>Could not parse CSML, problem reading XML into objects.</p>"
145                sys.exit()
146           
147
148           
149           
150            #This creates a new CSML document string from the CSML objects in memory.
151
152            try:
153                csml = dataset.toXML()
154            except:
155                traceback.print_exc()
156                print "<p>Could not parse CSML, problem writing XML from objects.</p>"
157                sys.exit()
158           
159
160            strCSML= PrettyPrintHTML(csml)
161            strCSML = removeInlineNS(strCSML)
162            encodings = ['utf-8', 'utf-16', 'iso-8859-1']
163            for encoding in encodings:
164                try:
165                    strCSML=str(strCSML.encode(encoding))
166                    print strCSML
167                except UnicodeError:
168                    pass
169                else:
170                    break
171            print "</HEAD>"
172    logging.debug('end of main')
173main()
174
Note: See TracBrowser for help on using the repository browser.