source: ndgCommon/trunk/ndg/common/src/models/DIF.py @ 5917

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/models/DIF.py@5917
Revision 5917, 10.5 KB checked in by sdonegan, 10 years ago (diff)

Added attribute method detailing how many services available for any given DIF

Line 
1'''
2 python class to support methods on a DIF ... to conform with
3 renderEntity etc ...
4 @author: D Lowe?
5'''
6from ndg.common.src.lib.utilities import formatDateYYYY
7from molesbounding import Bounding as Bounding
8from People2 import *
9from AccessControl import AccessControl
10from ndg.common.src.lib.ETxmlView import loadET, nsdumb
11from ServiceBinding import DIFService
12from ndg.common.src.models import ndgObject
13from stubB import collapse2
14import urllib, logging
15from xml.etree import ElementTree as ET
16   
17def shortLong(targetList,s,l):
18    ''' Format a short-long name combination as a string, and add to a list if non-empty,
19    and a non-duplicate'''
20    r=''
21    if l<>'':
22        if s<>'': 
23            r='%s (%s)'%(l,s)
24        else:
25            r=l
26    elif s<>'': r=s
27    if r not in targetList: targetList.append(r)
28    return targetList
29
30class DIF:
31    ''' Supports the NASA GCMD DIF format for python operations,
32    note ... not a complete implementation, currently minimum to
33    show a reasonable piece of content '''
34    def __init__(self,xml,et=0,debug=0,ndgObj=None):
35       
36        '''Initialise a python dif instance based on an xml document (expected
37        to be an input string if et=0, otherwise an ElementTree instance) '''
38        logging.info("Setting up DIF data model")
39        if et:
40            try:
41                self.tree=xml
42                self.xml=ET.tostring(xml)
43            except:
44                raise TypeError,'DIF input is not a valid ElementTree instance'
45        else:
46            try:
47                self.tree=loadET(xml)
48                self.xml=xml
49            except: # for some reason we can't parse the document, and it's worth knowing why
50                if isinstance(xml,str):
51                    l=min(len(xml),300)-1
52                    if l!=len(xml):xml=xml[0:l]+'\n...'
53                    raise ValueError('DIF input cannot be parsed into an ElementTree instance:\n%s'%xml)
54                else:
55                    raise TypeError('DIF input of type [%s] needs to be a string!'%type(xml))
56       
57        self.debug=debug
58       
59        # if this is an ndgObject that'll have been sorted externally ...
60        self.ndgObject=ndgObj
61       
62        # now try and interpret it
63       
64        helper=nsdumb(self.tree)
65        self.metadataType='DIF'
66        if helper.strip(self.tree.tag)!=self.metadataType: 
67            self.tree=helper.find(self.tree,self.metadataType)
68            if self.tree is None: 
69                raise ValueError, 'DIF input does not include a DIF element:\n%s'%self.xml
70       
71        self.entryID=helper.getText(self.tree,'Entry_ID')
72        self.abstract=helper.getText(self.tree,'Summary')
73        self.name=helper.getText(self.tree,'Entry_Title')
74        self.abbreviation=self.name[0:min(5,len(self.name))]
75
76        #add some extra parameters here to extract further columns to aid in ranking & ordering       
77        if len(helper.getText(self.tree,'DIF_Creation_Date')) < 1:           
78            #set to null as bad timestamp will screw up the sql commands (will scan for it later)
79            self.metadataCreationDate='null'
80        else:
81            self.metadataCreationDate=helper.getText(self.tree,'DIF_Creation_Date')
82           
83        self.datacentreName=helper.getText(self.tree,'Data_Center/Data_Center_Name/Short_Name')
84       
85        if len(helper.getText(self.tree,'Data_Set_Citation/Dataset_Title')) < 1:
86            self.datasetTitle=self.name
87        else:
88            self.datasetTitle=helper.getText(self.tree,'Data_Set_Citation/Dataset_Title')
89       
90        #logging.info('EXTRA INFORMATION for ORDERING= dataset title:  ' + self.datasetTitle + '  dataset creation date: ' + self.metadataCreationDate + '  datacentre name: ' + self.datacentreName)
91       
92        #Note that entity.constraints.html is about access control on the metadata,
93        #and so we don't populate this here ...
94        self.constraints=AccessControl(None)
95        self.AccessConstraints=helper.getText(self.tree,'Access_Constraints')
96        self.UseConstraints=helper.getText(self.tree,'Use_Constraints')
97        self.distribution=[]
98        for e in helper.findall(self.tree,'Distribution'):
99            d=dict([(k,helper.getText(e,k)) for k in 
100                ('Fees','Distribution_Media','Distribution_Size','Distribution_Format')])
101            addit=0
102            for k in d: 
103                if d[k]!='': addit=1
104            if addit: self.distribution.append(d)
105        #need entity.parameters, entity.bbox, entity.timeCoverage, entity.curator, entity.creators
106        # nb, two ways of parameters being stored
107     
108        self.parameters=[]
109        for parameter in helper.findall(self.tree,'Parameters'):
110            name=''
111            for level in ['Category','Topic','Term','Variable','Detailed_Variable']:
112                    name+=helper.getText(parameter,level)+'>'
113            name=name.rstrip('>')
114            if name=='':
115                name=helper.getText(parameter,'.')#.text
116                if name is not None: 
117                    #self.parameters.append(name.replace('>','/'))
118                    while len(name.split('>'))<5:name+='>'
119            self.parameters.append(name)
120        self.parameters=collapse2(self.parameters)
121       
122        #sensors are pretty important too
123        self.sensors=[]
124        for sensor in helper.findall(self.tree,'Sensor_Name'):
125            s,l=helper.getText(sensor,'Short_Name'),helper.getText(sensor,'Long_Name')
126            self.sensors=shortLong(self.sensors,s,l)
127               
128        #and sources
129        self.sources=[]
130        for source in helper.findall(self.tree,'Source_Name'):
131            s,l=helper.getText(source,'Short_Name'),helper.getText(source,'Long_Name')
132            self.sources=shortLong(self.sources,s,l)
133
134        self.projects=[]
135        for project in helper.findall(self.tree,'Project'):
136            s,l=helper.getText(project,'Short_Name'),helper.getText(project,'Long_Name')
137            self.projects=shortLong(self.projects,s,l)
138
139
140        #load up information about spatial bounding box
141        self.bbox=Bounding(self.tree,helper,entity='DIF')
142       
143        #load up information about temporal extent
144        # nb, some DIFS use Stop Date, some use End Date
145        date=helper.find(self.tree,'Temporal_Coverage')
146        e1,e2=helper.getText(date,'Stop_Date'),helper.getText(date,'End_Date')
147        start=helper.getText(date,'Start_Date')
148        status=helper.getText(self.tree,'Data_Set_Progress')
149        if e1<>'':
150            self.timeCoverage=[(start,e1,status)]
151        else:
152            self.timeCoverage=[(start,e2,status)]
153       
154        #load up those silly paleo keywords
155        self.paleoKeywords=[]
156        paleoElements=helper.findall(self.tree,'Paleo_Temporal_Coverage')
157        for e in paleoElements:
158            self.paleoKeywords.append(helper.getText(e,'Chronostratigraphic_Unit'))
159           
160        #Data curator information
161        self.centre=DIFcontact(helper,helper.find(self.tree,'Data_Center'))
162        #self.curator=DIFcontact(helper,helper.find(self.tree,'Data_Curator'))
163       
164        #Data Creators
165        self.creators=[]
166        # Let's first see if we get an author out of the citation,
167        self.authors=helper.getText(self.tree,'Data_Set_Citation/Dataset_Creator') 
168        self.date=formatDateYYYY(helper.getText(self.tree,'Data_Set_Citation/Dataset_Release_Date'))
169        self.title=helper.getText(self.tree,'Data_Set_Citation/Dataset_Title')
170        self.briefCitation=''
171        if (self.authors!='' and self.date!='' and self.title!=''):
172            self.briefCitation='%s (%s): %s'%(self.authors,self.date,self.title)
173        #for now
174        self.citation=self.briefCitation
175       
176        #an empty element sends something back, we don't want that:
177        self.personnel=[DIFcontact(helper,f) for f in helper.findall(self.tree,'Personnel')]
178        #services
179        self.services=[]
180       
181        # not all DIFs have KML resource available - set default value
182        # to the entryID to allow routing to work properly
183        self.kmlURL = []
184        self.wmcURL = []
185        self.serviceCounter = 0
186        for item in helper.findall(self.tree,'Related_URL'):
187            contentType=helper.getText(item,'URL_Content_Type')
188            serviceURL=helper.cleanup.undo(helper.getText(item,'URL'))
189            self.services.append(
190                DIFService(
191                 contentType,
192                 serviceURL,
193                 helper.getText(item,'Description') ))
194            # if there is a KML URL, encode it and add it as a field for
195            # easy access to store in session data for selectedItems
196            # NB, it needs to be encoded to avoid problems whilst sending
197            # via AJAX
198            if contentType.find('KML') > 0:
199                # NB, adding the '%' to quote forces forward slashes to be replaced
200                # with unicode equivalent but doesn't escape escape sequences.
201                self.kmlURL.append(urllib.quote(serviceURL,'%'))
202            elif contentType.find('WEB MAP CONTEXT') > 0:
203                self.wmcURL.append(urllib.quote(serviceURL,'%'))
204               
205            #count number of services available - will be used to determine whether to show "services" bar in DDS           
206            if 'GET SERVICE' in contentType:
207                self.serviceCounter = self.serviceCounter + 1
208               
209               
210        if self.ndgObject is None:
211            self.binding=None
212        else:
213            if self.ndgObject.discoveryURL is not None:
214                self.binding=DIFService('DISCOVERY',self.ndgObject.discoveryURL,'Discovery record')
215                self.discoveryViewURL = self.ndgObject.discoveryURL
216                #explicitly do late binding on NDG-B URL
217                #... and override contents in metadatarecord
218                #B='%s__%s__%s'%(self.ndgObject.repository,'NDG-B1',self.ndgObject.localID)
219                #Bobject=ndgObject(B,config=self.ndgObject.config)
220                #bdone=0
221                #if Bobject.gettable!=-1:
222                #    for s in self.services:
223                #        if s.contentType=='NDG_B_SERVICE':
224                #            s.url=Bobject.URL
225                #            bdone=1
226                #    if not bdone: self.services.append(
227                #                DIFService('NDG_B_SERVICE',Bobject.URL,'NDG Browse Metadata Service'))
228            else: self.binding=None
229        if self.debug:
230            f=open('difs.log','a')
231            f.write('%s##\n%s\n##################################\n'%(self.entryID,self.xml))
232            f.close()
233           
234        logging.info("DIF data model set up")
235
236       
Note: See TracBrowser for help on using the repository browser.