source: TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/DIF.py @ 3159

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/DIF.py@3159
Revision 3159, 10.0 KB checked in by lawrence, 12 years ago (diff)

Fixes for ticket:291. (These are hacks, I'm not at all confident
I've got the escaping sequences sorted properly, but I intend
to fix this when we have the new ET in ndgUtils and that's
backed into the trunk).

Line 
1# python class to support methods on a DIF ... to conform with
2# renderEntity etc ...
3#
4from Utilities import *
5from geoUtilities import *
6from People2 import *
7from AccessControl import AccessControl
8from ETxmlView import loadET, nsdumb
9from ServiceBinding import DIFService
10from ndgObject import ndgObject
11from stubB import collapse2
12try: #python 2.5
13    from xml.etree import ElementTree as ET
14except ImportError:
15    try:
16        # if you've installed it yourself it comes this way
17        import ElementTree as ET
18    except ImportError:
19        # if you've egged it this is the way it comes
20        from elementtree import ElementTree as ET
21debug=1
22   
23def shortLong(targetList,s,l):
24    ''' Format a short-long name combination as a string, and add to a list if non-empty,
25    and a non-duplicate'''
26    r=''
27    if l<>'':
28        if s<>'': 
29            r='%s (%s)'%(l,s)
30        else:
31            r=l
32    elif s<>'': r=s
33    if r not in targetList: targetList.append(r)
34    return targetList
35
36class DIF:
37    ''' Supports the NASA GCMD DIF format for python operations,
38    note ... not a complete implementation, currently minimum to
39    show a reasonable piece of content '''
40    def __init__(self,xml,et=0,debug=0,ndgObj=None):
41       
42        '''Initialise a python dif instance based on an xml document (expected
43        to be an input string if et=0, otherwise an ElementTree instance) '''
44        if et:
45            try:
46                self.tree=xml
47                self.xml=ET.tostring(xml)
48            except:
49                raise TypeError,'DIF input is not a valid ElementTree instance'
50        else:
51            try:
52                self.tree=loadET(xml)
53                self.xml=xml
54            except: # for some reason we can't parse the document, and it's worth knowing why
55                if isinstance(xml,str):
56                    l=min(len(xml),300)-1
57                    if l!=len(xml):xml=xml[0:l]+'\n...'
58                    raise ValueError('DIF input cannot be parsed into an ElementTree instance:\n%s'%xml)
59                else:
60                    raise TypeError('DIF input of type [%s] needs to be a string!'%type(xml))
61       
62        self.debug=debug
63       
64        # if this is an ndgObject that'll have been sorted externally ...
65        self.ndgObject=ndgObj
66       
67        # now try and interpret it
68       
69        helper=nsdumb(self.tree)
70        self.metadataType='DIF'
71        if helper.strip(self.tree.tag)!=self.metadataType: 
72            self.tree=helper.find(self.tree,self.metadataType)
73            if self.tree is None: 
74                raise ValueError, 'DIF input does not include a DIF element:\n%s'%self.xml
75       
76        self.entryID=helper.getText(self.tree,'Entry_ID')
77        self.abstract=helper.getText(self.tree,'Summary')
78        self.name=helper.getText(self.tree,'Entry_Title')
79        self.abbreviation=self.name[0:min(5,len(self.name))]
80       
81        #Note that entity.constraints.html is about access control on the metadata,
82        #and so we don't populate this here ...
83        self.constraints=AccessControl(None)
84        self.AccessConstraints=helper.getText(self.tree,'Access_Constraints')
85        self.UseConstraints=helper.getText(self.tree,'Use_Constraints')
86        self.distribution=[]
87        for e in helper.findall(self.tree,'Distribution'):
88            d=dict([(k,helper.getText(e,k)) for k in 
89                ('Fees','Distribution_Media','Distribution_Size','Distribution_Format')])
90            addit=0
91            for k in d: 
92                if d[k]!='': addit=1
93            if addit: self.distribution.append(d)
94        #need entity.parameters, entity.bbox, entity.timeCoverage, entity.curator, entity.creators
95        # nb, two ways of parameters being stored
96     
97        self.parameters=[]
98        for parameter in helper.findall(self.tree,'Parameters'):
99            name=''
100            for level in ['Category','Topic','Term','Variable','Detailed_Variable']:
101                    name+=helper.getText(parameter,level)+'>'
102            name=name.rstrip('>')
103            #print '[%s]\n%s'%(name,ET.tostring(parameter))
104            if name=='':
105                name=helper.getText(parameter,'.')#.text
106                if name is not None: 
107                    #self.parameters.append(name.replace('>','/'))
108                    while len(name.split('>'))<5:name+='>'
109            self.parameters.append(name)
110        self.parameters=collapse2(self.parameters)
111       
112        #sensors are pretty important too
113        self.sensors=[]
114        for sensor in helper.findall(self.tree,'Sensor_Name'):
115            s,l=helper.getText(sensor,'Short_Name'),helper.getText(sensor,'Long_Name')
116            self.sensors=shortLong(self.sensors,s,l)
117               
118        #and sources
119        self.sources=[]
120        for source in helper.findall(self.tree,'Source_Name'):
121            s,l=helper.getText(source,'Short_Name'),helper.getText(source,'Long_Name')
122            self.sources=shortLong(self.sources,s,l)
123
124        self.projects=[]
125        for project in helper.findall(self.tree,'Project'):
126            s,l=helper.getText(project,'Short_Name'),helper.getText(project,'Long_Name')
127            self.projects=shortLong(self.projects,s,l)
128
129
130        #load up information about spatial bounding box
131        self.bbox=Bounding(self.tree,helper,entity='DIF')
132       
133        #load up information about temporal extent
134        # nb, some DIFS use Stop Date, some use End Date
135        date=helper.find(self.tree,'Temporal_Coverage')
136        e1,e2=helper.getText(date,'Stop_Date'),helper.getText(date,'End_Date')
137        start=helper.getText(date,'Start_Date')
138        status=helper.getText(self.tree,'Data_Set_Progress')
139        if e1<>'':
140            self.timeCoverage=[(start,e1,status)]
141        else:
142            self.timeCoverage=[(start,e2,status)]
143       
144        #load up those silly paleo keywords
145        self.paleoKeywords=[]
146        paleoElements=helper.findall(self.tree,'Paleo_Temporal_Coverage')
147        for e in paleoElements:
148            self.paleoKeywords.append(helper.getText(e,'Chronostratigraphic_Unit'))
149           
150        #Data curator information
151        self.centre=DIFcontact(helper,helper.find(self.tree,'Data_Center'))
152        #self.curator=DIFcontact(helper,helper.find(self.tree,'Data_Curator'))
153       
154        #Data Creators
155        self.creators=[]
156        # Let's first see if we get an author out of the citation,
157        self.authors=helper.getText(self.tree,'Data_Set_Citation/Dataset_Creator') 
158        self.date=dateParse(helper.getText(self.tree,'Data_Set_Citation/Dataset_Release_Date'),'YYYY')
159        #if self.date=='': self.date='XXXX'
160        self.title=helper.getText(self.tree,'Data_Set_Citation/Dataset_Title')
161        self.briefCitation=''
162        if (self.authors!='' and self.date!='' and self.title!=''):
163            self.briefCitation='%s (%s): %s'%(self.authors,self.date,self.title)
164        #for now
165        self.citation=self.briefCitation
166       
167        #an empty element sends something back, we don't want that:
168        self.personnel=[DIFcontact(helper,f) for f in helper.findall(self.tree,'Personnel')]
169        #services
170        self.services=[]
171
172        for item in helper.findall(self.tree,'Related_URL'):
173            self.services.append(
174                DIFService(
175                 helper.getText(item,'URL_Content_Type'),
176                 helper.cleanup.undo(helper.getText(item,'URL')),
177                 helper.getText(item,'Description') ))
178       
179        if self.ndgObject is None:
180            self.binding=None
181        else:
182            if self.ndgObject.discoveryURL is not None:
183                self.binding=DIFService('DISCOVERY',self.ndgObject.discoveryURL,'Discovery record')
184                #explicitly do late binding on NDG-B URL
185                #... and override contents in metadatarecord
186                #B='%s__%s__%s'%(self.ndgObject.repository,'NDG-B1',self.ndgObject.localID)
187                #Bobject=ndgObject(B,config=self.ndgObject.config)
188                #bdone=0
189                #if Bobject.gettable!=-1:
190                #    for s in self.services:
191                #        if s.contentType=='NDG_B_SERVICE':
192                #            s.url=Bobject.URL
193                #            bdone=1
194                #    if not bdone: self.services.append(
195                #                DIFService('NDG_B_SERVICE',Bobject.URL,'NDG Browse Metadata Service'))
196            else: self.binding=None
197        if self.debug:
198           f=open('difs.log','a')
199           f.write('%s##\n%s\n##################################\n'%(self.entryID,self.xml))
200           f.close()
201
202
203import unittest
204
205class TestCase(unittest.TestCase):
206    """
207    """
208
209    inputFile = 'examples/neodc.eg1.dif'
210    inputFile2= 'examples/bodc.eg2.edmed.dif'
211    inputFile2= 'examples/bodc.eg3.difChange.dif'
212    inputFile2= 'examples/ndg.noc.soton.ac.uk__DIF__NOCSDAT162.xml'
213    configFile= 'config/ndgDiscovery.config'
214   
215    def setUp(self):
216        ''' Load example config and DIF files for testing '''
217        from xmlHandler2 import xmlHandler
218        x=xmlHandler(self.inputFile2)
219        self.dif=DIF(x.tree,et=1)
220        self.config=myConfig(self.configFile)
221
222    def testEntries(self):
223        ''' Testing the DIF object can be loaded and some key entries extracted '''
224        print 'Entry ID [%s]'%self.dif.entryID
225        print 'Author [%s]'%self.dif.authors
226        print 'Abstract [%s]'%self.dif.abstract
227        print 'BBox [%s]'%self.dif.bbox.boxes
228   
229    def testParameters(self):
230        ''' Testing obtaining parameters from an edmed dif in original format '''
231        print 'Parameters %s'%self.dif.parameters
232       
233    def testDifficult(self):
234        ''' Grab a test example from the internet and load it '''
235        from ndgRetrieve import ndgRetrieve
236        #testURI='grid.bodc.nerc.ac.uk__DIF__EDMED1048008'
237        testURI='neodc.nerc.ac.uk__DIF__NEODC_NEXTMAP'
238        uri=ndgObject(testURI,config=self.config)
239        status,xml=ndgRetrieve(uri,self.config)
240        self.assertEqual(status,1)
241        d=DIF(xml.tree,et=1)
242        print d.entryID,[str(i) for i in d.services]
243
244
245if __name__=="__main__":
246    unittest.main()
247
248       
249       
Note: See TracBrowser for help on using the repository browser.