source: TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/DIF.py @ 3842

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/DIF.py@3842
Revision 3842, 10.9 KB checked in by spascoe, 12 years ago (diff)

Various changes needed for compatibility with proxy configurations.

The URL encoding of selectItems/addSelectedItems has been overhalled
because it didn't work behind a proxy and had bugs anyway. Relative paths
in kid files have had $g.server added where appropriate.

Line 
1# python class to support methods on a DIF ... to conform with
2# renderEntity etc ...
3#
4from Utilities import *
5from geoUtilities import *
6from People2 import *
7from AccessControl import AccessControl
8from ndgUtils.ETxmlView import loadET, nsdumb
9from ServiceBinding import DIFService
10from ndgUtils.ndgObject import ndgObject
11from stubB import collapse2
12import urllib
13
14try: #python 2.5
15    from xml.etree import ElementTree as ET
16except ImportError:
17    try:
18        # if you've installed it yourself it comes this way
19        import ElementTree as ET
20    except ImportError:
21        # if you've egged it this is the way it comes
22        from elementtree import ElementTree as ET
23debug=1
24   
25def shortLong(targetList,s,l):
26    ''' Format a short-long name combination as a string, and add to a list if non-empty,
27    and a non-duplicate'''
28    r=''
29    if l<>'':
30        if s<>'': 
31            r='%s (%s)'%(l,s)
32        else:
33            r=l
34    elif s<>'': r=s
35    if r not in targetList: targetList.append(r)
36    return targetList
37
38class DIF:
39    ''' Supports the NASA GCMD DIF format for python operations,
40    note ... not a complete implementation, currently minimum to
41    show a reasonable piece of content '''
42    def __init__(self,xml,et=0,debug=0,ndgObj=None):
43       
44        '''Initialise a python dif instance based on an xml document (expected
45        to be an input string if et=0, otherwise an ElementTree instance) '''
46        if et:
47            try:
48                self.tree=xml
49                self.xml=ET.tostring(xml)
50            except:
51                raise TypeError,'DIF input is not a valid ElementTree instance'
52        else:
53            try:
54                self.tree=loadET(xml)
55                self.xml=xml
56            except: # for some reason we can't parse the document, and it's worth knowing why
57                if isinstance(xml,str):
58                    l=min(len(xml),300)-1
59                    if l!=len(xml):xml=xml[0:l]+'\n...'
60                    raise ValueError('DIF input cannot be parsed into an ElementTree instance:\n%s'%xml)
61                else:
62                    raise TypeError('DIF input of type [%s] needs to be a string!'%type(xml))
63       
64        self.debug=debug
65       
66        # if this is an ndgObject that'll have been sorted externally ...
67        self.ndgObject=ndgObj
68       
69        # now try and interpret it
70       
71        helper=nsdumb(self.tree)
72        self.metadataType='DIF'
73        if helper.strip(self.tree.tag)!=self.metadataType: 
74            self.tree=helper.find(self.tree,self.metadataType)
75            if self.tree is None: 
76                raise ValueError, 'DIF input does not include a DIF element:\n%s'%self.xml
77       
78        self.entryID=helper.getText(self.tree,'Entry_ID')
79        self.abstract=helper.getText(self.tree,'Summary')
80        self.name=helper.getText(self.tree,'Entry_Title')
81        self.abbreviation=self.name[0:min(5,len(self.name))]
82       
83        #Note that entity.constraints.html is about access control on the metadata,
84        #and so we don't populate this here ...
85        self.constraints=AccessControl(None)
86        self.AccessConstraints=helper.getText(self.tree,'Access_Constraints')
87        self.UseConstraints=helper.getText(self.tree,'Use_Constraints')
88        self.distribution=[]
89        for e in helper.findall(self.tree,'Distribution'):
90            d=dict([(k,helper.getText(e,k)) for k in 
91                ('Fees','Distribution_Media','Distribution_Size','Distribution_Format')])
92            addit=0
93            for k in d: 
94                if d[k]!='': addit=1
95            if addit: self.distribution.append(d)
96        #need entity.parameters, entity.bbox, entity.timeCoverage, entity.curator, entity.creators
97        # nb, two ways of parameters being stored
98     
99        self.parameters=[]
100        for parameter in helper.findall(self.tree,'Parameters'):
101            name=''
102            for level in ['Category','Topic','Term','Variable','Detailed_Variable']:
103                    name+=helper.getText(parameter,level)+'>'
104            name=name.rstrip('>')
105            if name=='':
106                name=helper.getText(parameter,'.')#.text
107                if name is not None: 
108                    #self.parameters.append(name.replace('>','/'))
109                    while len(name.split('>'))<5:name+='>'
110            self.parameters.append(name)
111        self.parameters=collapse2(self.parameters)
112       
113        #sensors are pretty important too
114        self.sensors=[]
115        for sensor in helper.findall(self.tree,'Sensor_Name'):
116            s,l=helper.getText(sensor,'Short_Name'),helper.getText(sensor,'Long_Name')
117            self.sensors=shortLong(self.sensors,s,l)
118               
119        #and sources
120        self.sources=[]
121        for source in helper.findall(self.tree,'Source_Name'):
122            s,l=helper.getText(source,'Short_Name'),helper.getText(source,'Long_Name')
123            self.sources=shortLong(self.sources,s,l)
124
125        self.projects=[]
126        for project in helper.findall(self.tree,'Project'):
127            s,l=helper.getText(project,'Short_Name'),helper.getText(project,'Long_Name')
128            self.projects=shortLong(self.projects,s,l)
129
130
131        #load up information about spatial bounding box
132        self.bbox=Bounding(self.tree,helper,entity='DIF')
133       
134        #load up information about temporal extent
135        # nb, some DIFS use Stop Date, some use End Date
136        date=helper.find(self.tree,'Temporal_Coverage')
137        e1,e2=helper.getText(date,'Stop_Date'),helper.getText(date,'End_Date')
138        start=helper.getText(date,'Start_Date')
139        status=helper.getText(self.tree,'Data_Set_Progress')
140        if e1<>'':
141            self.timeCoverage=[(start,e1,status)]
142        else:
143            self.timeCoverage=[(start,e2,status)]
144       
145        #load up those silly paleo keywords
146        self.paleoKeywords=[]
147        paleoElements=helper.findall(self.tree,'Paleo_Temporal_Coverage')
148        for e in paleoElements:
149            self.paleoKeywords.append(helper.getText(e,'Chronostratigraphic_Unit'))
150           
151        #Data curator information
152        self.centre=DIFcontact(helper,helper.find(self.tree,'Data_Center'))
153        #self.curator=DIFcontact(helper,helper.find(self.tree,'Data_Curator'))
154       
155        #Data Creators
156        self.creators=[]
157        # Let's first see if we get an author out of the citation,
158        self.authors=helper.getText(self.tree,'Data_Set_Citation/Dataset_Creator') 
159        self.date=dateParse(helper.getText(self.tree,'Data_Set_Citation/Dataset_Release_Date'),'YYYY')
160        #if self.date=='': self.date='XXXX'
161        self.title=helper.getText(self.tree,'Data_Set_Citation/Dataset_Title')
162        self.briefCitation=''
163        if (self.authors!='' and self.date!='' and self.title!=''):
164            self.briefCitation='%s (%s): %s'%(self.authors,self.date,self.title)
165        #for now
166        self.citation=self.briefCitation
167       
168        #an empty element sends something back, we don't want that:
169        self.personnel=[DIFcontact(helper,f) for f in helper.findall(self.tree,'Personnel')]
170        #services
171        self.services=[]
172       
173        # not all DIFs have KML resource available - set default value
174        # to the entryID to allow routing to work properly
175        self.kmlURL = []
176        self.wmcURL = []
177        for item in helper.findall(self.tree,'Related_URL'):
178            contentType=helper.getText(item,'URL_Content_Type')
179            serviceURL=helper.cleanup.undo(helper.getText(item,'URL'))
180            self.services.append(
181                DIFService(
182                 contentType,
183                 serviceURL,
184                 helper.getText(item,'Description') ))
185            # if there is a KML URL, encode it and add it as a field for
186            # easy access to store in session data for selectedItems
187            # NB, it needs to be encoded to avoid problems whilst sending
188            # via AJAX
189            if contentType.find('KML') > 0:
190                # NB, adding the '%' to quote forces forward slashes to be replaced
191                # with unicode equivalent but doesn't escape escape sequences.
192                self.kmlURL.append(urllib.quote(serviceURL,'%'))
193            elif contentType.find('WEB MAP CONTEXT') > 0:
194                self.wmcURL.append(urllib.quote(serviceURL,'%'))
195               
196        if self.ndgObject is None:
197            self.binding=None
198        else:
199            if self.ndgObject.discoveryURL is not None:
200                self.binding=DIFService('DISCOVERY',self.ndgObject.discoveryURL,'Discovery record')
201                #explicitly do late binding on NDG-B URL
202                #... and override contents in metadatarecord
203                #B='%s__%s__%s'%(self.ndgObject.repository,'NDG-B1',self.ndgObject.localID)
204                #Bobject=ndgObject(B,config=self.ndgObject.config)
205                #bdone=0
206                #if Bobject.gettable!=-1:
207                #    for s in self.services:
208                #        if s.contentType=='NDG_B_SERVICE':
209                #            s.url=Bobject.URL
210                #            bdone=1
211                #    if not bdone: self.services.append(
212                #                DIFService('NDG_B_SERVICE',Bobject.URL,'NDG Browse Metadata Service'))
213            else: self.binding=None
214        if self.debug:
215           f=open('difs.log','a')
216           f.write('%s##\n%s\n##################################\n'%(self.entryID,self.xml))
217           f.close()
218
219
220import unittest
221
222class TestCase(unittest.TestCase):
223    """
224    """
225
226    inputFile = 'examples/neodc.eg1.dif'
227    inputFile2= 'examples/bodc.eg2.edmed.dif'
228    inputFile2= 'examples/bodc.eg3.difChange.dif'
229    inputFile2= 'examples/ndg.noc.soton.ac.uk__DIF__NOCSDAT162.xml'
230    configFile= 'config/ndgDiscovery.config'
231   
232    def setUp(self):
233        ''' Load example config and DIF files for testing '''
234        from ndgUtils.xmlHandler2 import xmlHandler
235        x=xmlHandler(self.inputFile2)
236        self.dif=DIF(x.tree,et=1)
237        self.config=myConfig(self.configFile)
238
239    def testEntries(self):
240        ''' Testing the DIF object can be loaded and some key entries extracted '''
241        print 'Entry ID [%s]'%self.dif.entryID
242        print 'Author [%s]'%self.dif.authors
243        print 'Abstract [%s]'%self.dif.abstract
244        print 'BBox [%s]'%self.dif.bbox.boxes
245   
246    def testParameters(self):
247        ''' Testing obtaining parameters from an edmed dif in original format '''
248        print 'Parameters %s'%self.dif.parameters
249       
250    def testDifficult(self):
251        ''' Grab a test example from the internet and load it '''
252        from ndgUtils import ndgRetrieve, xmlHandler2
253        #testURI='grid.bodc.nerc.ac.uk__DIF__EDMED1048008'
254        testURI='neodc.nerc.ac.uk__DIF__NEODC_NEXTMAP'
255        uri=ndgObject(testURI,config=self.config)
256        status,xml=ndgRetrieve(uri,self.config)
257        self.assertEqual(status,1)
258        xml=xmlHandler2.xmlHandler(xml,string=1)
259        d=DIF(xml.tree,et=1)
260        print d.entryID,[str(i) for i in d.services]
261
262
263if __name__=="__main__":
264    unittest.main()
265
266       
267       
Note: See TracBrowser for help on using the repository browser.