source: ndgCommon/trunk/ndg/common/src/models/DIF.py @ 5883

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/models/DIF.py@5883
Revision 5883, 10.2 KB checked in by sdonegan, 10 years ago (diff)

Added basic discovery view url as a an attribute

Line 
1'''
2 python class to support methods on a DIF ... to conform with
3 renderEntity etc ...
4 @author: D Lowe?
5'''
6from ndg.common.src.lib.utilities import formatDateYYYY
7from molesbounding import Bounding as Bounding
8from People2 import *
9from AccessControl import AccessControl
10from ndg.common.src.lib.ETxmlView import loadET, nsdumb
11from ServiceBinding import DIFService
12from ndg.common.src.models import ndgObject
13from stubB import collapse2
14import urllib, logging
15from xml.etree import ElementTree as ET
16   
17def shortLong(targetList,s,l):
18    ''' Format a short-long name combination as a string, and add to a list if non-empty,
19    and a non-duplicate'''
20    r=''
21    if l<>'':
22        if s<>'': 
23            r='%s (%s)'%(l,s)
24        else:
25            r=l
26    elif s<>'': r=s
27    if r not in targetList: targetList.append(r)
28    return targetList
29
30class DIF:
31    ''' Supports the NASA GCMD DIF format for python operations,
32    note ... not a complete implementation, currently minimum to
33    show a reasonable piece of content '''
34    def __init__(self,xml,et=0,debug=0,ndgObj=None):
35       
36        '''Initialise a python dif instance based on an xml document (expected
37        to be an input string if et=0, otherwise an ElementTree instance) '''
38        logging.info("Setting up DIF data model")
39        if et:
40            try:
41                self.tree=xml
42                self.xml=ET.tostring(xml)
43            except:
44                raise TypeError,'DIF input is not a valid ElementTree instance'
45        else:
46            try:
47                self.tree=loadET(xml)
48                self.xml=xml
49            except: # for some reason we can't parse the document, and it's worth knowing why
50                if isinstance(xml,str):
51                    l=min(len(xml),300)-1
52                    if l!=len(xml):xml=xml[0:l]+'\n...'
53                    raise ValueError('DIF input cannot be parsed into an ElementTree instance:\n%s'%xml)
54                else:
55                    raise TypeError('DIF input of type [%s] needs to be a string!'%type(xml))
56       
57        self.debug=debug
58       
59        # if this is an ndgObject that'll have been sorted externally ...
60        self.ndgObject=ndgObj
61       
62        # now try and interpret it
63       
64        helper=nsdumb(self.tree)
65        self.metadataType='DIF'
66        if helper.strip(self.tree.tag)!=self.metadataType: 
67            self.tree=helper.find(self.tree,self.metadataType)
68            if self.tree is None: 
69                raise ValueError, 'DIF input does not include a DIF element:\n%s'%self.xml
70       
71        self.entryID=helper.getText(self.tree,'Entry_ID')
72        self.abstract=helper.getText(self.tree,'Summary')
73        self.name=helper.getText(self.tree,'Entry_Title')
74        self.abbreviation=self.name[0:min(5,len(self.name))]
75
76        #add some extra parameters here to extract further columns to aid in ranking & ordering       
77        if len(helper.getText(self.tree,'DIF_Creation_Date')) < 1:           
78            #set to null as bad timestamp will screw up the sql commands (will scan for it later)
79            self.metadataCreationDate='null'
80        else:
81            self.metadataCreationDate=helper.getText(self.tree,'DIF_Creation_Date')
82           
83        self.datacentreName=helper.getText(self.tree,'Data_Center/Data_Center_Name/Short_Name')
84       
85        if len(helper.getText(self.tree,'Data_Set_Citation/Dataset_Title')) < 1:
86            self.datasetTitle=self.name
87        else:
88            self.datasetTitle=helper.getText(self.tree,'Data_Set_Citation/Dataset_Title')
89       
90        #logging.info('EXTRA INFORMATION for ORDERING= dataset title:  ' + self.datasetTitle + '  dataset creation date: ' + self.metadataCreationDate + '  datacentre name: ' + self.datacentreName)
91       
92        #Note that entity.constraints.html is about access control on the metadata,
93        #and so we don't populate this here ...
94        self.constraints=AccessControl(None)
95        self.AccessConstraints=helper.getText(self.tree,'Access_Constraints')
96        self.UseConstraints=helper.getText(self.tree,'Use_Constraints')
97        self.distribution=[]
98        for e in helper.findall(self.tree,'Distribution'):
99            d=dict([(k,helper.getText(e,k)) for k in 
100                ('Fees','Distribution_Media','Distribution_Size','Distribution_Format')])
101            addit=0
102            for k in d: 
103                if d[k]!='': addit=1
104            if addit: self.distribution.append(d)
105        #need entity.parameters, entity.bbox, entity.timeCoverage, entity.curator, entity.creators
106        # nb, two ways of parameters being stored
107     
108        self.parameters=[]
109        for parameter in helper.findall(self.tree,'Parameters'):
110            name=''
111            for level in ['Category','Topic','Term','Variable','Detailed_Variable']:
112                    name+=helper.getText(parameter,level)+'>'
113            name=name.rstrip('>')
114            if name=='':
115                name=helper.getText(parameter,'.')#.text
116                if name is not None: 
117                    #self.parameters.append(name.replace('>','/'))
118                    while len(name.split('>'))<5:name+='>'
119            self.parameters.append(name)
120        self.parameters=collapse2(self.parameters)
121       
122        #sensors are pretty important too
123        self.sensors=[]
124        for sensor in helper.findall(self.tree,'Sensor_Name'):
125            s,l=helper.getText(sensor,'Short_Name'),helper.getText(sensor,'Long_Name')
126            self.sensors=shortLong(self.sensors,s,l)
127               
128        #and sources
129        self.sources=[]
130        for source in helper.findall(self.tree,'Source_Name'):
131            s,l=helper.getText(source,'Short_Name'),helper.getText(source,'Long_Name')
132            self.sources=shortLong(self.sources,s,l)
133
134        self.projects=[]
135        for project in helper.findall(self.tree,'Project'):
136            s,l=helper.getText(project,'Short_Name'),helper.getText(project,'Long_Name')
137            self.projects=shortLong(self.projects,s,l)
138
139
140        #load up information about spatial bounding box
141        self.bbox=Bounding(self.tree,helper,entity='DIF')
142       
143        #load up information about temporal extent
144        # nb, some DIFS use Stop Date, some use End Date
145        date=helper.find(self.tree,'Temporal_Coverage')
146        e1,e2=helper.getText(date,'Stop_Date'),helper.getText(date,'End_Date')
147        start=helper.getText(date,'Start_Date')
148        status=helper.getText(self.tree,'Data_Set_Progress')
149        if e1<>'':
150            self.timeCoverage=[(start,e1,status)]
151        else:
152            self.timeCoverage=[(start,e2,status)]
153       
154        #load up those silly paleo keywords
155        self.paleoKeywords=[]
156        paleoElements=helper.findall(self.tree,'Paleo_Temporal_Coverage')
157        for e in paleoElements:
158            self.paleoKeywords.append(helper.getText(e,'Chronostratigraphic_Unit'))
159           
160        #Data curator information
161        self.centre=DIFcontact(helper,helper.find(self.tree,'Data_Center'))
162        #self.curator=DIFcontact(helper,helper.find(self.tree,'Data_Curator'))
163       
164        #Data Creators
165        self.creators=[]
166        # Let's first see if we get an author out of the citation,
167        self.authors=helper.getText(self.tree,'Data_Set_Citation/Dataset_Creator') 
168        self.date=formatDateYYYY(helper.getText(self.tree,'Data_Set_Citation/Dataset_Release_Date'))
169        self.title=helper.getText(self.tree,'Data_Set_Citation/Dataset_Title')
170        self.briefCitation=''
171        if (self.authors!='' and self.date!='' and self.title!=''):
172            self.briefCitation='%s (%s): %s'%(self.authors,self.date,self.title)
173        #for now
174        self.citation=self.briefCitation
175       
176        #an empty element sends something back, we don't want that:
177        self.personnel=[DIFcontact(helper,f) for f in helper.findall(self.tree,'Personnel')]
178        #services
179        self.services=[]
180       
181        # not all DIFs have KML resource available - set default value
182        # to the entryID to allow routing to work properly
183        self.kmlURL = []
184        self.wmcURL = []
185        for item in helper.findall(self.tree,'Related_URL'):
186            contentType=helper.getText(item,'URL_Content_Type')
187            serviceURL=helper.cleanup.undo(helper.getText(item,'URL'))
188            self.services.append(
189                DIFService(
190                 contentType,
191                 serviceURL,
192                 helper.getText(item,'Description') ))
193            # if there is a KML URL, encode it and add it as a field for
194            # easy access to store in session data for selectedItems
195            # NB, it needs to be encoded to avoid problems whilst sending
196            # via AJAX
197            if contentType.find('KML') > 0:
198                # NB, adding the '%' to quote forces forward slashes to be replaced
199                # with unicode equivalent but doesn't escape escape sequences.
200                self.kmlURL.append(urllib.quote(serviceURL,'%'))
201            elif contentType.find('WEB MAP CONTEXT') > 0:
202                self.wmcURL.append(urllib.quote(serviceURL,'%'))
203               
204        if self.ndgObject is None:
205            self.binding=None
206        else:
207            if self.ndgObject.discoveryURL is not None:
208                self.binding=DIFService('DISCOVERY',self.ndgObject.discoveryURL,'Discovery record')
209                self.discoveryViewURL = self.ndgObject.discoveryURL
210                #explicitly do late binding on NDG-B URL
211                #... and override contents in metadatarecord
212                #B='%s__%s__%s'%(self.ndgObject.repository,'NDG-B1',self.ndgObject.localID)
213                #Bobject=ndgObject(B,config=self.ndgObject.config)
214                #bdone=0
215                #if Bobject.gettable!=-1:
216                #    for s in self.services:
217                #        if s.contentType=='NDG_B_SERVICE':
218                #            s.url=Bobject.URL
219                #            bdone=1
220                #    if not bdone: self.services.append(
221                #                DIFService('NDG_B_SERVICE',Bobject.URL,'NDG Browse Metadata Service'))
222            else: self.binding=None
223        if self.debug:
224            f=open('difs.log','a')
225            f.write('%s##\n%s\n##################################\n'%(self.entryID,self.xml))
226            f.close()
227           
228        logging.info("DIF data model set up")
229
230       
Note: See TracBrowser for help on using the repository browser.