source: TI02-CSML/trunk/parser/parser_extra.py @ 1438

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/parser/parser_extra.py@1438
Revision 1438, 8.8 KB checked in by domlowe, 13 years ago (diff)

Fix for XML unicode problems (I think). Tests files to see what format they are then passes the correct encoding information to elementtree. Requres further testing

Line 
1#parser_extra.py
2# DL 12 april 2006
3# 'extra' functions to enable parsing.
4#contains:
5#. Namespace fix
6# Function to resolve references in CSML doc.
7
8import Parser
9import sys
10import elementtree.ElementTree as etree
11import string
12import cElementTree as ET
13import codecs
14
15
16#some xml/string manipulation functions. may as well go in this file for now:
17
18def encodingParser(file, encoding):
19    f = codecs.open(file, "r", encoding)
20    p = ET.XMLParser(encoding="utf-8")
21    while 1:
22        s = f.read(65536)
23        if not s:
24            break
25        p.feed(s.encode("utf-8"))
26    return ET.ElementTree(p.close())
27
28
29def PrettyPrint(elem,indent='',html=0,space='   '):
30        '''Lightweight pretty printing of elementTree elements'''
31        def estrip(elem):
32                ''' Just want to get rid of unwanted whitespace '''
33                if elem is None:
34                        return ''
35                else:
36                        return elem.strip()
37        strAttrib=''
38        for att in elem.attrib:
39                strAttrib+=' %s="%s"'%(att,elem.attrib[att])
40        result='%s<%s%s>%s'%(indent,elem.tag,strAttrib,estrip(elem.text))
41        children=len(elem)
42        if children:
43                for item in elem:
44                        result+='\n'+PrettyPrint(item,indent=indent+space)
45                result+='\n%s%s</%s>'%(indent,estrip(item.tail),elem.tag)
46        else:
47                result+='</%s>'%(elem.tag)
48        return result
49
50# fixing up namespaces:
51def removeInlineNS(csmlstring):
52        #  removeInlineNS: function removes "inline" namespaces and declares them as part of the Dataset element.
53        #TODO: This whole thing is hardcoded and needs reviewing.
54       
55        #first need to update the ElementTree namespace map:
56        etree._namespace_map.update({
57        'http://www.opengis.net/om': 'om',  'http://www.opengis.net/gml': 'gml','http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'})
58   
59        #replace any fully qualified namespaces
60        csmlstring=csmlstring.replace('{http://www.opengis.net/gml}', 'gml:')
61        csmlstring=csmlstring.replace('{http://ndg.nerc.ac.uk/csml}','')
62        csmlstring=csmlstring.replace('{http://www.w3.org/1999/xlink}','xlink:')
63        csmlstring=csmlstring.replace('{http://www.opengis.net/om}','om:')       
64        csmlstring=csmlstring.replace('{http://ndg.nerc.ac.uk/moles}','moles:')
65        #remove cmsl: prefixes
66        csmlstring=csmlstring.replace('<csml:','<')
67        csmlstring=csmlstring.replace('</csml:','</')
68       
69        #add namespace declarations at top of document
70        csmlstring=csmlstring.replace('<Dataset', '<Dataset xmlns="http://ndg.nerc.ac.uk/csml"  xmlns:gml="http://www.opengis.net/gml" xmlns:om="http://www.opengis.net/om" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:moles="http://ndg.nerc.ac.uk/moles"  xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://ndg.nerc.ac.uk/csml/XMLSchemas/CSMLAppSchema.xsd"')
71       
72        #this is only used when creating a new csml document
73        csmlstring=csmlstring.replace('ns0', 'gml')
74        csmlstring=csmlstring.replace('xmlns:ns1', 'xmlns:xlink')
75        csmlstring=csmlstring.replace('ns1:href', 'xlink:href')
76        csmlstring=csmlstring.replace('ns1:moles', 'xlink:moles')
77        csmlstring=csmlstring.replace('ns1:', '')  #the rest of the ns1s are CSML namespaces... due to the way it has been parsed.       
78        return csmlstring
79
80
81def isURI(uri):
82    """ a very simple function to test if a string is a uri
83    if ;// appears in the first 12 characters it is probably a uri """
84    #TODO - a decent uri check!   
85    result = False
86    if string.find(uri[:12], '://') != -1:
87        result = True
88    return result
89
90
91class ParserPostProcessor:
92    def __init__(self,dataset):
93        if isinstance(dataset,Parser.Dataset):
94            self.dataset=dataset
95        else: 
96            print 'Error: Could not post-process CSML document'
97            sys.exit()
98   
99    def resolveReferences(self):
100        #if there are any phenomenon defintions then
101         #for every <parameter> element, need to resolve any hrefs.
102        if hasattr(self.dataset, 'phenomenonDefinitions'):
103            for feature in self.dataset.featureCollection.members:
104                if hasattr(feature, 'parameter'):
105                    if hasattr(feature.parameter,'href'):
106                        #search phenomenon definitions for a match.
107                        for definition in self.dataset.phenomenonDefinitions.definitionMembers:
108                            if definition.id == feature.parameter.href[1:]:  #remove '#'
109                                #remove href and replace with object
110                                feature.parameter.href =None
111                                feature.parameter=definition
112                                break
113       
114        #if there are embedded reference system definitions then
115        #for every DomainReference and DomainComplement need to resolve any srsNames
116        if hasattr(self.dataset, 'referenceSystemDefinitions'):
117            for feature in self.dataset.featureCollection.members:
118                if hasattr(feature, 'domain'):  #Trajectory features don't have domains..
119                    if hasattr(feature.domain.domainReference, 'srsName'):
120                        #search reference definitions for a match.
121                        for definition in self.dataset.referenceSystemDefinitions.definitionMembers:
122                            if definition.id == feature.domain.domainReference.srsName[1:]:
123                                #remove srsName attribute and replace with Object.
124                                pass #Leave this for now.
125                    if hasattr(feature.domain, 'domainComplement'):
126                        if hasattr(feature.domain.domainComplement, 'srsName'):
127                            #search reference definitions for a match.
128                            for definition in self.dataset.referenceSystemDefinitions.definitionMembers:
129                                if definition.id == feature.domain.domainComplement.srsName[1:]:
130                                        #remove srsName attribute and replace with Object.
131                                    pass #Leave this for now.
132       
133       #NetCDFExtracts etc may be referenced e.g. as <axisvalues> element. Need to fix these too.
134        if hasattr(self.dataset, 'arrayDescriptors'):
135            for feature in self.dataset.featureCollection.members:
136                if hasattr (feature, 'domain'):
137                    if hasattr(feature.domain, 'domainComplement'):
138                        if isinstance(feature.domain.domainComplement, Parser.Grid):
139                                for ordinate in feature.domain.domainComplement.ordinates:
140                                    #search arrayDescriptors for a match.
141                                    for arrayDescriptor  in self.dataset.arrayDescriptors:
142                                        if arrayDescriptor.id == ordinate.axisValues[1:]:  #remove '#'
143                                            ordinate.axisValues  = arrayDescriptor
144                                            break
145                                            #also times may reference
146                    if hasattr(feature.domain, 'domainReference'):   
147                        if hasattr(feature.domain.domainReference, 'times'):
148                            if hasattr(feature.domain.domainReference.times, 'href'):
149                            #search descriptors for a match.
150                                for arrayDescriptor in self.dataset.arrayDescriptors:
151                                    if arrayDescriptor.id == feature.domain.domainReference.times.href[1:]:  #remove '#'
152                                        #remove times xlink attribute and replace with Object.
153                                        feature.domain.domainReference.times.timePositions= arrayDescriptor
154                                        break
155                                   
156        #sys.exit()   
157        #uom may reference UnitDefinitions. Replace these too
158        if hasattr(self.dataset,'arrayDescriptors'):
159            for feature in self.dataset.featureCollection.members:
160                if hasattr (feature, 'rangeSet'):
161                    if hasattr (feature.rangeSet, 'aggregatedArray'):
162                         if hasattr(feature.rangeSet.aggregatedArray, 'uom'):
163                             if hasattr(self.dataset, 'unitDefinitions'):
164                                for definition in self.dataset.unitDefinitions.definitionMembers:
165                                    if definition.id== feature.rangeSet.aggregatedArray.uom[1:]:
166                                        feature.rangeSet.aggregatedArray.uom=definition
167                                        break
168        #TODO need to also take into account the fact that UnitDefinitions could be externally defined.
169       
170        return self.dataset
Note: See TracBrowser for help on using the repository browser.