source: TI02-CSML/trunk/csml/csml2Moles/csml2moles.py @ 2168

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/csml/csml2Moles/csml2moles.py@2168
Revision 2168, 9.1 KB checked in by domlowe, 14 years ago (diff)

updated csml2moles.py for version 2 schema, also some slight changes to molesreadwrite.py

Line 
1#!/usr/bin/env python
2
3import sys
4import csml.parser
5import cElementTree
6import elementtree.ElementTree as etree
7import datetime
8import molesReadWrite as MRW
9
10'''call using: python csml2moles.py badc.nerc.ac.uk molesdocABC example.xml example2.xml example3.xml'''
11
12
13#this is a fix to the  ElementTree namespace problem that namespaces are usually represented as ns0, ns1, ns2 etc.
14etree._namespace_map.update({
15        'http://www.opengis.net/om': 'om',  'http://www.opengis.net/gml': 'gml','http://ndg.nerc.ac.uk/csml' : 'csml', 'http://www.w3.org/1999/xlink':'xlink'})
16
17def strToDate(ymd):
18    '''given a string in form '2006-01-01' returns a datetime object'''
19    return datetime.date(*map(int, ymd.split('-')))
20
21def _getEnvelopefromDataset(ds):
22    '''gets the bounding box from the feature collection'''
23    envelope= ds.featureCollection.boundedBy
24    return envelope
25
26def _parsefile(f):
27    tree = cElementTree.ElementTree(file=f)
28    dataset = csml.parser.Dataset()
29    dataset.fromXML(tree.getroot())
30    return dataset
31   
32
33class StandardName(object):
34    def __init__(self, somename):
35        '''given some name, attempt to resolve it into a standard name type structure. by stripping out any URLs etc'''
36        #types of names:
37        #myrandomvar
38        #http://some.namespace/localparams#somevar  #conventions?
39        #http://some.namespace/something#somevar  # standard name
40        #from these extract text (description), termid and vocabid where possible.       
41        #for starters split at '#'
42        #this can't be finalised till we agree on a convention.
43
44        sections=somename.split('#')
45        self.text=''
46        if len(sections)==2:
47            self.vocab=sections[0]
48            self.term=sections[1]
49        else:
50            self.term=somename
51            self.vocab=''
52class EnvelopeAggregator(object):
53    def __init__(self,envelope):
54        '''start with aggregated envelope equal to the initialising envelope
55        envelope must be of type csml.parser.EnvelopeWithTimePeriod'''
56        self.envelope=envelope
57        self.minX=self.envelope.lowerCorner.CONTENT.split()[0]
58        self.minY=self.envelope.lowerCorner.CONTENT.split()[1]
59        self.maxX=self.envelope.upperCorner.CONTENT.split()[0]
60        self.maxY=self.envelope.upperCorner.CONTENT.split()[1]
61        self.t1= strToDate(self.envelope.timePosition[0].CONTENT)
62        self.t2= strToDate(self.envelope.timePosition[1].CONTENT)
63 
64    def _compareLowerCorners(self,lowerCorner):
65        minX,minY=lowerCorner.CONTENT.split()[0],lowerCorner.CONTENT.split()[1]
66        if float(minX) < float(self.minX):
67            self.envelope.lowerCorner.CONTENT=str(minX +' '+ self.minY)
68            self.minX=minX
69        if float(minY) < float(self.minY):
70            self.envelope.lowerCorner.CONTENT=str(self.minX +' '+ minY)
71            self.minY=minY
72           
73    def _compareUpperCorners(self,upperCorner):
74        maxX,maxY=upperCorner.CONTENT.split()[0],upperCorner.CONTENT.split()[1]
75        if float(maxX) > float(self.maxX):
76            self.envelope.upperCorner.CONTENT=str(maxX +' '+ self.maxY)
77            self.maxX=maxX
78        if float(maxY) > float(self.maxY):
79            self.envelope.upperCorner.CONTENT=str(self.maxX +' '+ maxY)
80            self.maxY=maxY
81           
82    def _compareLowerTimes(self,timepos):
83        t=strToDate(timepos)
84        if t<self.t1:
85            self.t1=t
86       
87    def _compareUpperTimes(self,timepos):
88        t=strToDate(timepos)
89        if t>self.t2:
90            self.t2=t
91   
92    def compareEnvelope(self,envtocheck):
93        '''compares new envelope, and if necessary changes the aggregated envelope.'''
94        if envtocheck.srsName!=self.envelope.srsName:
95            print 'Can currently only perform aggregation within the same spatio-temporal reference system.'
96            sys.exit()
97        else: 
98            self._compareLowerCorners(envtocheck.lowerCorner)
99            self._compareUpperCorners(envtocheck.upperCorner)
100            self._compareLowerTimes(envtocheck.timePosition[0].CONTENT)
101            self._compareUpperTimes(envtocheck.timePosition[1].CONTENT)
102   
103    def getAggregatedEnvelope(self):
104        self.envelope.timePosition=[csml.parser.csString(str(self.t1)), csml.parser.csString(str(self.t2))] #convert from datetime types
105        return self.envelope
106
107
108def main(args=None):
109    '''Get command line arguments, should be an ID scheme for the moles document and paths to csml files
110    i,e, python  csml2moles.py badc.nerc.ac.uk document123 csmlfile1.xml csmlfile2.xml csmlfile3.xml
111    main() will aggregate the spatial and temporal bounding box across all the featureCollections.
112    Currently only one featureCollection per csml file is supported
113   
114    TODO: This should be extended to handle directories of files
115    TODO: CSML files may be stored in exist '''
116    mNS='http://ndg.nerc.ac.uk/moles'
117    repository = sys.argv[1]
118    identifier= sys.argv[2]
119
120    if args:
121        sys.argv =args
122    csmlfilelist= sys.argv[3:]
123
124    # MOLES skeleton document:
125    M=MRW.MolesDoc()
126   
127    #set up coverage envelope aggragator with envelope from first file:
128    dataset=_parsefile(csmlfilelist[0])
129    env=_getEnvelopefromDataset(dataset)
130    aggregator=EnvelopeAggregator(env)
131 
132   
133    #set up lists to hold stuff
134    schemeIdentifiers=[]
135    repositoryIdentifiers=[]
136    localIdentifiers=[]
137    parameterSummaries=[]
138   
139    #for each file:
140    for f in csmlfilelist:
141        #parse file:
142        dataset=_parsefile(f)
143        #compare envelope:
144        aggregator.compareEnvelope(_getEnvelopefromDataset(dataset))
145        #get other things:
146        schemeIdentifiers.append('NDG-A0')
147        repositoryIdentifiers.append('badc.nerc.ac.uk')
148        localIdentifiers.append(f)
149        #create parameter summaries:
150        #print dir(dataset.featureCollection)
151       
152        for feature in dataset.featureCollection.featureMembers:
153            if hasattr(feature.parameter, 'href'):
154                sn=StandardName(feature.parameter.href)
155                if hasattr(feature, 'description'):
156                    sn.text=feature.description.CONTENT
157            VTID=M.dgValidTermID( ParentListID=sn.vocab, TermID=sn.term)
158           
159            SPM=M.dgStdParameterMeasured(dgValidTerm= sn.text, dgValidTermID=VTID)
160            RDP=M.dgRangeDataParameter(HighValue='', LowValue='')
161            PS  = M.dgParameterSummary(dgRangeDataParameter=RDP, dgStdParameterMeasured=SPM)
162            parameterSummaries.append(PS)
163    #get aggregated envelope:
164    finalEnvelope=aggregator.getAggregatedEnvelope()
165    limitN=finalEnvelope.upperCorner.CONTENT.split()[0]
166    limitS=finalEnvelope.lowerCorner.CONTENT.split()[0]
167    limitE=finalEnvelope.upperCorner.CONTENT.split()[1]
168    limitW=finalEnvelope.lowerCorner.CONTENT.split()[1]
169    tmin=finalEnvelope.timePosition[0].CONTENT
170    tmax=finalEnvelope.timePosition[1].CONTENT
171   
172    #get other aggregations:
173   
174   
175    ''' The MOLES document:
176    create your moles doc by setting attributes of "M.className"
177    the classNames used must be declared in molesWriter.MolesDoc.classList'''
178   
179    #MetaData ID
180    dgMID=M.dgMetadataID(schemeIdentifier='NDG-B0', repositoryIdentifier=repository, localIdentifier=identifier)
181   
182    #create data granules
183    datagranules=[]
184    for i, file in enumerate(csmlfilelist):
185        DMid=M.dataModelID(schemeIdentifier=schemeIdentifiers[i], repositoryIdentifier=repositoryIdentifiers[i],localIdentifier= localIdentifiers[i])
186        DG  = M.dgDataGranule(dataModelID=DMid)
187        datagranules.append(DG)
188
189    #create coverage
190    dRng=M.DateRange(DateRangeStart=tmin, DateRangeEnd=tmax)
191    dgBB=M.dgBoundingBox(LimitNorth = limitN, LimitSouth=limitS,LimitEast=limitE, LimitWest=limitW)
192    dgSc=M.dgSpatialCoverage(BoundingBox=dgBB)
193    dgTc=M.dgTemporalCoverage(DateRange=dRng)
194    dgSrng=M.dgSpatioTemporalRange(dgSpatialCoverage = dgSc, dgTemporalCoverage=dgTc)
195    dgST=M.dgSpatioTemporalCoverage(dgSpatioTemporalRange=dgSrng)
196    dgCv=M.dgCoverage(dgSpatioTemporalCoverage=dgST)
197           
198    #create data summary:
199    DS = M.dgDataSummary(dgDataCoverage=dgCv, dgParameterSummary=parameterSummaries)
200    #create data entity:
201    dgDE= M.dgDataEntity(dgDataGranule=datagranules, dgDataSummary=DS)
202 
203   
204    #create metadata description:
205    mdID=M.metadataDescriptionID(schemeIdentifier='1',repositoryIdentifier='2', localIdentifier='3')
206    dgMD=M.dgMetadataDescription(metadataDescriptionID=mdID)
207    #create metadata record
208    dgMR=M.dgMetadataRecord(dgMetadataID=dgMID, dgDataEntity=dgDE, dgMetadataDescription=dgMD)
209    dgMeta=MRW.dgMetadata(dgMetadataRecord=dgMR)
210
211    #produce XML  and save to file
212    molestree=dgMeta.toXML()
213    moles=csml.parser_extra.PrettyPrint(molestree)
214    print '\n \n \n BEFORE PARSING'
215    print moles
216    f=open('molesout.xml','w')
217    f.write(moles)
218    f.close()
219
220    sys.exit()
221   
222    #produce XML again (round trip)
223    tree=cElementTree.ElementTree(file='molesout.xml')
224    dgMeta=MRW.dgMetadata()
225    dgMeta.fromXML(tree.getroot())
226    molestree=dgMeta.toXML()
227    moles=csml.parser_extra.PrettyPrint(molestree)
228    print molestree
229    print '\n \n \n AFTER PARSING'
230    print moles   
231
232if __name__=='__main__':
233    main()
Note: See TracBrowser for help on using the repository browser.