1 | #parser_extra.py |
---|
2 | # DL 12 april 2006 |
---|
3 | #v2 DL 22 November 2006: changed namespaces to reflect v2 schema |
---|
4 | # 'extra' functions to enable parsing. |
---|
5 | #contains: |
---|
6 | #. Namespace fix |
---|
7 | # Function to resolve references in CSML doc. |
---|
8 | |
---|
9 | import csml |
---|
10 | import csml.parser |
---|
11 | import sys |
---|
12 | import elementtree.ElementTree as etree |
---|
13 | import string |
---|
14 | import cElementTree as ET |
---|
15 | import codecs |
---|
16 | |
---|
17 | |
---|
18 | #some xml/string manipulation functions. may as well go in this file for now: |
---|
19 | |
---|
20 | def encodingParser(file, encoding): |
---|
21 | f = codecs.open(file, "r", encoding) |
---|
22 | p = ET.XMLParser(encoding="utf-8") |
---|
23 | while 1: |
---|
24 | s = f.read(65536) |
---|
25 | if not s: |
---|
26 | break |
---|
27 | p.feed(s.encode("utf-8")) |
---|
28 | return ET.ElementTree(p.close()) |
---|
29 | |
---|
30 | |
---|
31 | def PrettyPrint(elem,indent='',html=0,space=' '): |
---|
32 | '''Lightweight pretty printing of elementTree elements''' |
---|
33 | def estrip(elem): |
---|
34 | ''' Just want to get rid of unwanted whitespace ''' |
---|
35 | if elem is None: |
---|
36 | return '' |
---|
37 | else: |
---|
38 | return elem.strip() |
---|
39 | strAttrib='' |
---|
40 | for att in elem.attrib: |
---|
41 | strAttrib+=' %s="%s"'%(att,elem.attrib[att]) |
---|
42 | result='%s<%s%s>%s'%(indent,elem.tag,strAttrib,estrip(elem.text)) |
---|
43 | children=len(elem) |
---|
44 | if children: |
---|
45 | for item in elem: |
---|
46 | result+='\n'+PrettyPrint(item,indent=indent+space) |
---|
47 | result+='\n%s%s</%s>'%(indent,estrip(item.tail),elem.tag) |
---|
48 | else: |
---|
49 | result+='</%s>'%(elem.tag) |
---|
50 | return result |
---|
51 | |
---|
52 | # fixing up namespaces: |
---|
53 | def removeInlineNS(csmlstring): |
---|
54 | # removeInlineNS: function removes "inline" namespaces and declares them as part of the Dataset element. |
---|
55 | #TODO: This whole thing is hardcoded and needs reviewing. |
---|
56 | |
---|
57 | #first need to update the ElementTree namespace map: |
---|
58 | etree._namespace_map.update({ |
---|
59 | 'http://www.opengis.net/om': 'om', 'http://www.opengis.net/gml': 'gml','http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'}) |
---|
60 | |
---|
61 | #replace any fully qualified namespaces |
---|
62 | csmlstring=csmlstring.replace('{http://www.opengis.net/gml}', 'gml:') |
---|
63 | csmlstring=csmlstring.replace('{http://ndg.nerc.ac.uk/csml}','') |
---|
64 | csmlstring=csmlstring.replace('{http://www.w3.org/1999/xlink}','xlink:') |
---|
65 | csmlstring=csmlstring.replace('{http://www.opengis.net/om}','om:') |
---|
66 | csmlstring=csmlstring.replace('{http://www.opengis.net/swe}','swe:') |
---|
67 | csmlstring=csmlstring.replace('{http://ndg.nerc.ac.uk/moles}','moles:') |
---|
68 | #remove cmsl: prefixes |
---|
69 | csmlstring=csmlstring.replace('<csml:','<') |
---|
70 | csmlstring=csmlstring.replace('</csml:','</') |
---|
71 | |
---|
72 | #add namespace declarations at top of document |
---|
73 | csmlstring=csmlstring.replace('<Dataset', '<Dataset xmlns="http://ndg.nerc.ac.uk/csml" xmlns:gml="http://www.opengis.net/gml" xmlns:om="http://www.opengis.net/om" xmlns:swe="http://www.opengis.net/swe" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:moles="http://ndg.nerc.ac.uk/moles" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://ndg.nerc.ac.uk/csml/XMLSchemas/CSMLAppSchema.xsd"') |
---|
74 | |
---|
75 | #this is only used when creating a new csml document |
---|
76 | csmlstring=csmlstring.replace('ns0', 'gml') |
---|
77 | csmlstring=csmlstring.replace('xmlns:ns1', 'xmlns:xlink') |
---|
78 | csmlstring=csmlstring.replace('ns1:href', 'xlink:href') |
---|
79 | csmlstring=csmlstring.replace('ns1:moles', 'xlink:moles') |
---|
80 | csmlstring=csmlstring.replace('ns1:', '') #the rest of the ns1s are CSML namespaces... due to the way it has been parsed. |
---|
81 | #what about SWE - need to check this ********TODO***** |
---|
82 | return csmlstring |
---|
83 | |
---|
84 | |
---|
85 | def isURI(uri): |
---|
86 | """ a very simple function to test if a string is a uri |
---|
87 | if ;// appears in the first 12 characters it is probably a uri """ |
---|
88 | #TODO - a decent uri check! |
---|
89 | result = False |
---|
90 | if string.find(uri[:12], '://') != -1: |
---|
91 | result = True |
---|
92 | return result |
---|
93 | |
---|
94 | |
---|
95 | class ParserPostProcessor: |
---|
96 | def __init__(self,dataset): |
---|
97 | |
---|
98 | self.dataset=dataset |
---|
99 | |
---|
100 | def __findFLmatch(self, href): |
---|
101 | for sd in csml.csmllibs.csmlextra.listify(self.dataset.storageDescriptor.descriptors): |
---|
102 | if type(sd)==csml.parser.AggregatedArray: |
---|
103 | for comp in csml.csmllibs.csmlextra.listify(sd.components): |
---|
104 | if hasattr(comp, 'fileList'): |
---|
105 | if hasattr(comp.fileList, 'id'): |
---|
106 | if comp.fileList.id==href: |
---|
107 | return comp.fileList |
---|
108 | |
---|
109 | |
---|
110 | def __findSDmatch(self, href): |
---|
111 | for sd in csml.csmllibs.csmlextra.listify(self.dataset.storageDescriptor.descriptors): |
---|
112 | if type(sd) in [csml.parser.NetCDFExtract, csml.parser.AggregatedArray]: |
---|
113 | if hasattr(sd, 'id'): |
---|
114 | if sd.id == href: |
---|
115 | return sd |
---|
116 | |
---|
117 | |
---|
118 | |
---|
119 | |
---|
120 | def resolveReferences(self): |
---|
121 | #for each xlink, call csml.csmllibs.csmlxlink.resolveXlink() |
---|
122 | |
---|
123 | #start with fileLists in the the storage descriptors: |
---|
124 | if hasattr(self.dataset, 'storageDescriptor'): |
---|
125 | if hasattr(self.dataset.storageDescriptor, 'descriptors'): |
---|
126 | for sd in csml.csmllibs.csmlextra.listify(self.dataset.storageDescriptor.descriptors): |
---|
127 | if type(sd)==csml.parser.AggregatedArray: |
---|
128 | for comp in csml.csmllibs.csmlextra.listify(sd.components): |
---|
129 | if hasattr(comp, 'fileListXLINK'): |
---|
130 | if hasattr(comp.fileListXLINK, 'href'): |
---|
131 | fList = self.__findFLmatch(comp.fileListXLINK.href[1:]) |
---|
132 | if fList is not None: |
---|
133 | comp.fileList =fList |
---|
134 | del comp.fileListXLINK |
---|
135 | |
---|
136 | # now handle any xlinks in the domain: |
---|
137 | for feature in csml.csmllibs.csmlextra.listify(self.dataset.featureCollection.featureMembers): |
---|
138 | cvg = feature.value |
---|
139 | for att in ['gridSeriesDomain', 'pointDomain', 'blah blah']: |
---|
140 | if hasattr(cvg, att): |
---|
141 | domain=getattr(cvg,att) |
---|
142 | for ordinate in domain.coordTransformTable.gridOrdinates: |
---|
143 | if hasattr(ordinate.coordAxisValues,'coordinateList'): |
---|
144 | if ordinate.coordAxisValues.coordinateList.CONTENT[:1] == '#': |
---|
145 | dataforClist= self.__findSDmatch(ordinate.coordAxisValues.coordinateList.CONTENT[1:]) |
---|
146 | if dataforClist is not None: |
---|
147 | setattr(ordinate.coordAxisValues, '__insertedExtract', dataforClist) |
---|
148 | break |
---|
149 | |
---|
150 | #now the rangeSet: |
---|
151 | if hasattr(feature.value.rangeSet, 'valueArray'): |
---|
152 | #could be an xlink here.. |
---|
153 | for vc in csml.csmllibs.csmlextra.listify(feature.value.rangeSet.valueArray.valueComponent): |
---|
154 | if hasattr(vc, 'href'): |
---|
155 | dataforQlist=self.__findSDmatch(vc.href[1:]) |
---|
156 | if dataforQlist is not None: |
---|
157 | if not hasattr(vc, 'quantityList'): |
---|
158 | setattr(vc, 'quantityList', csml.parser.MeasureOrNullList()) |
---|
159 | setattr(vc.quantityList, '__insertedExtract', dataforQlist) |
---|
160 | |
---|
161 | return self.dataset |
---|