source: TI02-CSML/branches/CSML2/parser_extra.py @ 1686

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/branches/CSML2/parser_extra.py@1686
Revision 1686, 3.8 KB checked in by domlowe, 15 years ago (diff)

identifying code that needs changing for csml v2 and putting stubs in where possible. More detailed notes in v2notes.txt file

Line 
1#parser_extra.py
2# DL 12 april 2006
3# 'extra' functions to enable parsing.
4#contains:
5#. Namespace fix
6# Function to resolve references in CSML doc.
7
8import csml.parser
9import sys
10import elementtree.ElementTree as etree
11import string
12import cElementTree as ET
13import codecs
14
15
16#some xml/string manipulation functions. may as well go in this file for now:
17
18def encodingParser(file, encoding):
19    f = codecs.open(file, "r", encoding)
20    p = ET.XMLParser(encoding="utf-8")
21    while 1:
22        s = f.read(65536)
23        if not s:
24            break
25        p.feed(s.encode("utf-8"))
26    return ET.ElementTree(p.close())
27
28
29def PrettyPrint(elem,indent='',html=0,space='   '):
30        '''Lightweight pretty printing of elementTree elements'''
31        def estrip(elem):
32                ''' Just want to get rid of unwanted whitespace '''
33                if elem is None:
34                        return ''
35                else:
36                        return elem.strip()
37        strAttrib=''
38        for att in elem.attrib:
39                strAttrib+=' %s="%s"'%(att,elem.attrib[att])
40        result='%s<%s%s>%s'%(indent,elem.tag,strAttrib,estrip(elem.text))
41        children=len(elem)
42        if children:
43                for item in elem:
44                        result+='\n'+PrettyPrint(item,indent=indent+space)
45                result+='\n%s%s</%s>'%(indent,estrip(item.tail),elem.tag)
46        else:
47                result+='</%s>'%(elem.tag)
48        return result
49
50# fixing up namespaces:
51def removeInlineNS(csmlstring):
52        #  removeInlineNS: function removes "inline" namespaces and declares them as part of the Dataset element.
53        #TODO: This whole thing is hardcoded and needs reviewing.
54       
55        #first need to update the ElementTree namespace map:
56        etree._namespace_map.update({
57        'http://www.opengis.net/om': 'om',  'http://www.opengis.net/gml': 'gml','http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'})
58   
59        #replace any fully qualified namespaces
60        csmlstring=csmlstring.replace('{http://www.opengis.net/gml}', 'gml:')
61        csmlstring=csmlstring.replace('{http://ndg.nerc.ac.uk/csml}','')
62        csmlstring=csmlstring.replace('{http://www.w3.org/1999/xlink}','xlink:')
63        csmlstring=csmlstring.replace('{http://www.opengis.net/om}','om:')       
64        csmlstring=csmlstring.replace('{http://ndg.nerc.ac.uk/moles}','moles:')
65        #remove cmsl: prefixes
66        csmlstring=csmlstring.replace('<csml:','<')
67        csmlstring=csmlstring.replace('</csml:','</')
68       
69        #add namespace declarations at top of document
70        csmlstring=csmlstring.replace('<Dataset', '<Dataset xmlns="http://ndg.nerc.ac.uk/csml"  xmlns:gml="http://www.opengis.net/gml" xmlns:om="http://www.opengis.net/om" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:moles="http://ndg.nerc.ac.uk/moles"  xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://ndg.nerc.ac.uk/csml/XMLSchemas/CSMLAppSchema.xsd"')
71       
72        #this is only used when creating a new csml document
73        csmlstring=csmlstring.replace('ns0', 'gml')
74        csmlstring=csmlstring.replace('xmlns:ns1', 'xmlns:xlink')
75        csmlstring=csmlstring.replace('ns1:href', 'xlink:href')
76        csmlstring=csmlstring.replace('ns1:moles', 'xlink:moles')
77        csmlstring=csmlstring.replace('ns1:', '')  #the rest of the ns1s are CSML namespaces... due to the way it has been parsed.       
78        return csmlstring
79
80
81def isURI(uri):
82    """ a very simple function to test if a string is a uri
83    if ;// appears in the first 12 characters it is probably a uri """
84    #TODO - a decent uri check!   
85    result = False
86    if string.find(uri[:12], '://') != -1:
87        result = True
88    return result
89
90
91class ParserPostProcessor:
92    def __init__(self,dataset):
93    #Needs rewriting for CSML v2    see V1original.
94       
95        return self.dataset
Note: See TracBrowser for help on using the repository browser.