source: TI02-CSML/trunk/csml/csml2Moles/csml2moles.py @ 1838

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/csml/csml2Moles/csml2moles.py@2047
Revision 1838, 8.7 KB checked in by domlowe, 13 years ago (diff)

moved xmlSchema() instantiation from init to toXML method

Line 
1#!/usr/bin/env python
2
3import sys
4import csml.parser
5import cElementTree
6import elementtree.ElementTree as etree
7import datetime
8import molesReadWrite as MRW
9
10'''call using: python csml2moles.py badc.nerc.ac.uk molesdocABC example.xml example2.xml example3.xml'''
11
12
13#this is a fix to the  ElementTree namespace problem that namespaces are usually represented as ns0, ns1, ns2 etc.
14etree._namespace_map.update({
15        'http://www.opengis.net/om': 'om',  'http://www.opengis.net/gml': 'gml','http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'})
16
17def _strToDate(ymd):
18    '''given a string in form '2006-01-01' returns a datetime object'''
19    return datetime.date(*map(int, ymd.split('-')))
20
21def _getEnvelopefromDataset(ds):
22    '''gets the bounding box from the feature collection'''
23    envelope= ds.featureCollection.boundedBy
24    return envelope
25
26def _parsefile(f):
27    tree = cElementTree.ElementTree(file=f)
28    dataset = csml.parser.Dataset()
29    dataset.fromXML(tree.getroot())
30    return dataset
31   
32
33class StandardName(object):
34    def __init__(self, somename):
35        '''given some name, attempt to resolve it into a standard name type structure. by stripping out any URLs etc'''
36        #types of names:
37        #myrandomvar
38        #http://some.namespace/localparams#somevar  #conventions?
39        #http://some.namespace/something#somevar  # standard name
40        #from these extract text (description), termid and vocabid where possible.       
41        #for starters split at '#'
42        #this can't be finalised till we agree on a convention.
43
44        sections=somename.split('#')
45        self.text=''
46        if len(sections)==2:
47            self.vocab=sections[0]
48            self.term=sections[1]
49        else:
50            self.term=somename
51            self.vocab=''
52
53class EnvelopeAggregator(object):
54    def __init__(self,envelope):
55        '''start with aggregated envelope equal to the initialising envelope
56        envelope must be of type csml.parser.EnvelopeWithTimePeriod'''
57        self.envelope=envelope
58        self.minX=self.envelope.lowerCorner.vals.split()[0]
59        self.minY=self.envelope.lowerCorner.vals.split()[1]
60        self.maxX=self.envelope.upperCorner.vals.split()[0]
61        self.maxY=self.envelope.upperCorner.vals.split()[1]
62        self.t1= _strToDate(self.envelope.timePosition)
63        self.t2= _strToDate(self.envelope.timePosition)
64 
65    def _compareLowerCorners(self,lowerCorner):
66        minX,minY=lowerCorner.vals.split()[0],lowerCorner.vals.split()[1]
67        if float(minX) < float(self.minX):
68            self.envelope.lowerCorner.vals=str(minX +' '+ self.minY)
69            self.minX=minX
70        if float(minY) < float(self.minY):
71            self.envelope.lowerCorner.vals=str(self.minX +' '+ minY)
72            self.minY=minY
73           
74    def _compareUpperCorners(self,upperCorner):
75        maxX,maxY=upperCorner.vals.split()[0],upperCorner.vals.split()[1]
76        if float(maxX) > float(self.maxX):
77            self.envelope.upperCorner.vals=str(maxX +' '+ self.maxY)
78            self.maxX=maxX
79        if float(maxY) > float(self.maxY):
80            self.envelope.upperCorner.vals=str(self.maxX +' '+ maxY)
81            self.maxY=maxY
82           
83    def _compareLowerTimes(self,timepos):
84        t=_strToDate(timepos)
85        if t<self.t1:
86            self.t1=t
87       
88    def _compareUpperTimes(self,timepos):
89        t=_strToDate(timepos)
90        if t>self.t2:
91            self.t2=t
92   
93    def compareEnvelope(self,envtocheck):
94        '''compares new envelope, and if necessary changes the aggregated envelope.'''
95        if envtocheck.srsName!=self.envelope.srsName:
96            print 'Can currently only perform aggregation within the same spatio-temporal reference system.'
97            sys.exit()
98        else: 
99            self._compareLowerCorners(envtocheck.lowerCorner)
100            self._compareUpperCorners(envtocheck.upperCorner)
101            self._compareLowerTimes(envtocheck.timePosition)
102            self._compareUpperTimes(envtocheck.timePosition2)
103   
104    def getAggregatedEnvelope(self):
105        self.envelope.timePosition=str(self.t1)  #convert from datetime types
106        self.envelope.timePosition2=str(self.t2)
107        return self.envelope
108
109
110def main(args=None):
111    '''Get command line arguments, should be an ID scheme for the moles document and paths to csml files
112    i,e, python  csml2moles.py badc.nerc.ac.uk document123 csmlfile1.xml csmlfile2.xml csmlfile3.xml
113    main() will aggregate the spatial and temporal bounding box across all the featureCollections.
114    Currently only one featureCollection per csml file is supported
115   
116    TODO: This should be extended to handle directories of files
117    TODO: CSML files may be stored in exist '''
118    mNS='http://ndg.nerc.ac.uk/moles'
119    repository = sys.argv[1]
120    identifier= sys.argv[2]
121
122    if args:
123        sys.argv =args
124    csmlfilelist= sys.argv[3:]
125
126    # MOLES skeleton document:
127    M=MRW.MolesDoc()
128   
129    #set up coverage envelope aggragator with envelope from first file:
130    dataset=_parsefile(csmlfilelist[0])
131    env=_getEnvelopefromDataset(dataset)
132    aggregator=EnvelopeAggregator(env)
133 
134   
135    #set up lists to hold stuff
136    schemeIdentifiers=[]
137    repositoryIdentifiers=[]
138    localIdentifiers=[]
139    parameterSummaries=[]
140   
141    #for each file:
142    for f in csmlfilelist:
143        #parse file:
144        dataset=_parsefile(f)
145        #compare envelope:
146        aggregator.compareEnvelope(_getEnvelopefromDataset(dataset))
147        #get other things:
148        schemeIdentifiers.append('NDG-A0')
149        repositoryIdentifiers.append('badc.nerc.ac.uk')
150        localIdentifiers.append(f)
151        #create parameter summaries:
152        #print dir(dataset.featureCollection)
153       
154        for feature in dataset.featureCollection.members:
155            if hasattr(feature.parameter, 'href'):
156                sn=StandardName(feature.parameter.href)
157                if hasattr(feature, 'description'):
158                    sn.text=feature.description
159            VTID=M.dgValidTermID( ParentListID=sn.vocab, TermID=sn.term)
160           
161            SPM=M.dgStdParameterMeasured(dgValidTerm= sn.text, dgValidTermID=VTID)
162            RDP=M.dgRangeDataParameter(HighValue='', LowValue='')
163            PS  = M.dgParameterSummary(dgRangeDataParameter=RDP, dgStdParameterMeasured=SPM)
164            parameterSummaries.append(PS)
165    #get aggregated envelope:
166    finalEnvelope=aggregator.getAggregatedEnvelope()
167    limitN=finalEnvelope.upperCorner.vals.split()[0]
168    limitS=finalEnvelope.lowerCorner.vals.split()[0]
169    limitE=finalEnvelope.upperCorner.vals.split()[1]
170    limitW=finalEnvelope.lowerCorner.vals.split()[1]
171    #get other aggregations:
172   
173
174   
175    ''' The MOLES document:
176    create your moles doc by setting attributes of "M.className"
177    the classNames used must be declared in molesWriter.MolesDoc.classList'''
178   
179    #MetaData ID
180    dgMID=M.dgMetadataID(schemeIdentifier='NDG-B0', repositoryIdentifier=repository, localIdentifier=identifier)
181   
182    #create data granules
183    datagranules=[]
184    for i, file in enumerate(csmlfilelist):
185        DMid=M.dataModelID(schemeIdentifier=schemeIdentifiers[i], repositoryIdentifier=repositoryIdentifiers[i],localIdentifier= localIdentifiers[i])
186        DG  = M.dgDataGranule(dataModelID=DMid)
187        datagranules.append(DG)
188
189    #create coverage
190    dgBB=M.dgBoundingBox(LimitNorth = limitN, LimitSouth=limitS,LimitEast=limitE, LimitWest=limitW)
191    dgSc=M.dgSpatialCoverage(dgBoundingBox=dgBB)
192    dgTc=M.dgTemporalCoverage(mNS)
193    dgST=M.dgSpatioTemporalCoverage(dgSpatialCoverage = dgSc, dgTemporalCoverage=dgTc)
194    dgCv=M.dgCoverage(dgSpatioTemporalCoverage=dgST)
195   
196    #create data summary:
197    DS = M.dgDataSummary(dgCoverage=dgCv, dgParameterSummary=parameterSummaries)
198    #create data entity:
199    dgDE= M.dgDataEntity(dgDataGranule=datagranules, dgDataSummary=DS)
200   
201    #create metadata description:
202    mdID=M.metadataDescriptionID(schemeIdentifier='1',repositoryIdentifier='2', localIdentifier='3')
203    dgMD=M.dgMetadataDescription(metadataDescriptionID=mdID)
204    #create metadata record
205    dgMR=M.dgMetadataRecord(dgMetadataID=dgMID, dgDataEntity=dgDE, dgMetadataDescription=dgMD)
206    dgMeta=MRW.dgMetadata(dgMetadataRecord=dgMR)
207
208    #produce XML  and save to file
209    molestree=dgMeta.toXML()
210    moles=csml.parser_extra.PrettyPrint(molestree)
211    print '\n \n \n BEFORE PARSING'
212    print moles
213    f=open('molesout.xml','w')
214    f.write(moles)
215    f.close()
216
217   
218    #produce XML again (round trip)
219    tree=cElementTree.ElementTree(file='molesout.xml')
220    dgMeta=MRW.dgMetadata()
221    dgMeta.fromXML(tree.getroot())
222    molestree=dgMeta.toXML()
223    moles=csml.parser_extra.PrettyPrint(molestree)
224    print '\n \n \n AFTER PARSING'
225    print moles   
226    print dir(molestree)
227   
228if __name__=='__main__':
229    main()
Note: See TracBrowser for help on using the repository browser.