[925] | 1 | #parser_extra.py |
---|
| 2 | # DL 12 april 2006 |
---|
[1737] | 3 | #v2 DL 22 November 2006: changed namespaces to reflect v2 schema |
---|
[925] | 4 | # 'extra' functions to enable parsing. |
---|
| 5 | #contains: |
---|
| 6 | #. Namespace fix |
---|
| 7 | # Function to resolve references in CSML doc. |
---|
| 8 | |
---|
[2038] | 9 | import csml |
---|
[1483] | 10 | import csml.parser |
---|
[925] | 11 | import sys |
---|
[2617] | 12 | try: #python 2.5 |
---|
| 13 | from xml.etree import ElementTree as etree |
---|
| 14 | except ImportError: |
---|
| 15 | try: |
---|
| 16 | # if you've installed it yourself it comes this way |
---|
| 17 | import ElementTree as etree |
---|
| 18 | except ImportError: |
---|
| 19 | # if you've egged it this is the way it comes |
---|
| 20 | from elementtree import ElementTree as etree |
---|
[1297] | 21 | import string |
---|
[1438] | 22 | import cElementTree as ET |
---|
| 23 | import codecs |
---|
[925] | 24 | |
---|
[1438] | 25 | |
---|
[925] | 26 | #some xml/string manipulation functions. may as well go in this file for now: |
---|
| 27 | |
---|
[1438] | 28 | def encodingParser(file, encoding): |
---|
| 29 | f = codecs.open(file, "r", encoding) |
---|
| 30 | p = ET.XMLParser(encoding="utf-8") |
---|
| 31 | while 1: |
---|
| 32 | s = f.read(65536) |
---|
| 33 | if not s: |
---|
| 34 | break |
---|
| 35 | p.feed(s.encode("utf-8")) |
---|
| 36 | return ET.ElementTree(p.close()) |
---|
[925] | 37 | |
---|
[1438] | 38 | |
---|
[925] | 39 | def PrettyPrint(elem,indent='',html=0,space=' '): |
---|
| 40 | '''Lightweight pretty printing of elementTree elements''' |
---|
| 41 | def estrip(elem): |
---|
| 42 | ''' Just want to get rid of unwanted whitespace ''' |
---|
| 43 | if elem is None: |
---|
| 44 | return '' |
---|
| 45 | else: |
---|
| 46 | return elem.strip() |
---|
| 47 | strAttrib='' |
---|
| 48 | for att in elem.attrib: |
---|
| 49 | strAttrib+=' %s="%s"'%(att,elem.attrib[att]) |
---|
| 50 | result='%s<%s%s>%s'%(indent,elem.tag,strAttrib,estrip(elem.text)) |
---|
| 51 | children=len(elem) |
---|
| 52 | if children: |
---|
| 53 | for item in elem: |
---|
| 54 | result+='\n'+PrettyPrint(item,indent=indent+space) |
---|
| 55 | result+='\n%s%s</%s>'%(indent,estrip(item.tail),elem.tag) |
---|
| 56 | else: |
---|
| 57 | result+='</%s>'%(elem.tag) |
---|
| 58 | return result |
---|
| 59 | |
---|
| 60 | # fixing up namespaces: |
---|
| 61 | def removeInlineNS(csmlstring): |
---|
| 62 | # removeInlineNS: function removes "inline" namespaces and declares them as part of the Dataset element. |
---|
| 63 | |
---|
| 64 | #replace any fully qualified namespaces |
---|
| 65 | csmlstring=csmlstring.replace('{http://www.opengis.net/gml}', 'gml:') |
---|
| 66 | csmlstring=csmlstring.replace('{http://ndg.nerc.ac.uk/csml}','') |
---|
| 67 | csmlstring=csmlstring.replace('{http://www.w3.org/1999/xlink}','xlink:') |
---|
[1737] | 68 | csmlstring=csmlstring.replace('{http://www.opengis.net/om}','om:') |
---|
| 69 | csmlstring=csmlstring.replace('{http://www.opengis.net/swe}','swe:') |
---|
[1147] | 70 | csmlstring=csmlstring.replace('{http://ndg.nerc.ac.uk/moles}','moles:') |
---|
[925] | 71 | #remove cmsl: prefixes |
---|
| 72 | csmlstring=csmlstring.replace('<csml:','<') |
---|
| 73 | csmlstring=csmlstring.replace('</csml:','</') |
---|
| 74 | |
---|
| 75 | #add namespace declarations at top of document |
---|
[2619] | 76 | csmlstring=csmlstring.replace('<Dataset', '<Dataset xmlns="http://ndg.nerc.ac.uk/csml" xmlns:gml="http://www.opengis.net/gml" xmlns:om="http://www.opengis.net/om" xmlns:swe="http://www.opengis.net/swe" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:moles="http://ndg.nerc.ac.uk/moles" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="http://ndg.nerc.ac.uk/csml csmlDataset.xsd"') |
---|
[925] | 77 | |
---|
| 78 | #this is only used when creating a new csml document |
---|
| 79 | csmlstring=csmlstring.replace('ns0', 'gml') |
---|
| 80 | csmlstring=csmlstring.replace('xmlns:ns1', 'xmlns:xlink') |
---|
| 81 | csmlstring=csmlstring.replace('ns1:href', 'xlink:href') |
---|
[1147] | 82 | csmlstring=csmlstring.replace('ns1:moles', 'xlink:moles') |
---|
[925] | 83 | csmlstring=csmlstring.replace('ns1:', '') #the rest of the ns1s are CSML namespaces... due to the way it has been parsed. |
---|
[1737] | 84 | #what about SWE - need to check this ********TODO***** |
---|
[925] | 85 | return csmlstring |
---|
| 86 | |
---|
| 87 | |
---|
[1297] | 88 | def isURI(uri): |
---|
| 89 | """ a very simple function to test if a string is a uri |
---|
| 90 | if ;// appears in the first 12 characters it is probably a uri """ |
---|
| 91 | #TODO - a decent uri check! |
---|
| 92 | result = False |
---|
| 93 | if string.find(uri[:12], '://') != -1: |
---|
| 94 | result = True |
---|
| 95 | return result |
---|
| 96 | |
---|
| 97 | |
---|
[925] | 98 | class ParserPostProcessor: |
---|
[2526] | 99 | def __init__(self,dataset): |
---|
[2038] | 100 | |
---|
[1902] | 101 | self.dataset=dataset |
---|
[925] | 102 | |
---|
[2040] | 103 | def __findFLmatch(self, href): |
---|
[2038] | 104 | for sd in csml.csmllibs.csmlextra.listify(self.dataset.storageDescriptor.descriptors): |
---|
| 105 | if type(sd)==csml.parser.AggregatedArray: |
---|
| 106 | for comp in csml.csmllibs.csmlextra.listify(sd.components): |
---|
| 107 | if hasattr(comp, 'fileList'): |
---|
| 108 | if hasattr(comp.fileList, 'id'): |
---|
| 109 | if comp.fileList.id==href: |
---|
| 110 | return comp.fileList |
---|
| 111 | |
---|
| 112 | |
---|
[2362] | 113 | def __findSDmatch(self, href): |
---|
[2040] | 114 | for sd in csml.csmllibs.csmlextra.listify(self.dataset.storageDescriptor.descriptors): |
---|
[2059] | 115 | if type(sd) in [csml.parser.NetCDFExtract, csml.parser.AggregatedArray]: |
---|
[2040] | 116 | if hasattr(sd, 'id'): |
---|
[2059] | 117 | if sd.id == href: |
---|
[2057] | 118 | return sd |
---|
[2040] | 119 | |
---|
| 120 | |
---|
[2038] | 121 | |
---|
[1902] | 122 | def resolveReferences(self): |
---|
[2361] | 123 | #Need to do this in more simple manner! for each xlink, call csml.csmllibs.csmlxlink.resolveXlink() |
---|
[2040] | 124 | |
---|
[2038] | 125 | #start with fileLists in the the storage descriptors: |
---|
| 126 | if hasattr(self.dataset, 'storageDescriptor'): |
---|
| 127 | if hasattr(self.dataset.storageDescriptor, 'descriptors'): |
---|
| 128 | for sd in csml.csmllibs.csmlextra.listify(self.dataset.storageDescriptor.descriptors): |
---|
| 129 | if type(sd)==csml.parser.AggregatedArray: |
---|
| 130 | for comp in csml.csmllibs.csmlextra.listify(sd.components): |
---|
| 131 | if hasattr(comp, 'fileListXLINK'): |
---|
| 132 | if hasattr(comp.fileListXLINK, 'href'): |
---|
[2040] | 133 | fList = self.__findFLmatch(comp.fileListXLINK.href[1:]) |
---|
[2038] | 134 | if fList is not None: |
---|
| 135 | comp.fileList =fList |
---|
| 136 | del comp.fileListXLINK |
---|
| 137 | |
---|
[2040] | 138 | # now handle any xlinks in the domain: |
---|
[2112] | 139 | for feature in csml.csmllibs.csmlextra.listify(self.dataset.featureCollection.featureMembers): |
---|
[2040] | 140 | cvg = feature.value |
---|
[2224] | 141 | for att in ['gridSeriesDomain', 'pointDomain', 'profileSeriesDomain','blah blah']: |
---|
[2040] | 142 | if hasattr(cvg, att): |
---|
| 143 | domain=getattr(cvg,att) |
---|
| 144 | for ordinate in domain.coordTransformTable.gridOrdinates: |
---|
[2362] | 145 | if hasattr(ordinate.coordAxisValues,'href'): |
---|
| 146 | if ordinate.coordAxisValues.arcrole.split('#')[1] in ['coordinateList', 'timePositionList']: |
---|
| 147 | dataforClist= self.__findSDmatch(ordinate.coordAxisValues.href[1:]) |
---|
| 148 | if dataforClist is not None: |
---|
[2444] | 149 | setattr(ordinate.coordAxisValues, 'insertedExtract', dataforClist) |
---|
[2040] | 150 | break |
---|
[2059] | 151 | |
---|
[2057] | 152 | #now the rangeSet: |
---|
[2059] | 153 | if hasattr(feature.value.rangeSet, 'valueArray'): |
---|
| 154 | #could be an xlink here.. |
---|
| 155 | for vc in csml.csmllibs.csmlextra.listify(feature.value.rangeSet.valueArray.valueComponent): |
---|
| 156 | if hasattr(vc, 'href'): |
---|
| 157 | dataforQlist=self.__findSDmatch(vc.href[1:]) |
---|
| 158 | if dataforQlist is not None: |
---|
[2444] | 159 | setattr(vc, 'insertedExtract', dataforQlist) |
---|
[2059] | 160 | |
---|
[2526] | 161 | return self.dataset |
---|
| 162 | |
---|