source: exist/trunk/python/ndgUtils/models/DIF.py @ 4487

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/models/DIF.py@4487
Revision 4487, 9.5 KB checked in by cbyrom, 12 years ago (diff)

Further restructure of codebase - moving datamodels into ndgUtils to allow reuse + create visualise and browse sub structures to better
organise related code. Separate out some inner classes into their own
modules to make more visible.

Line 
1# python class to support methods on a DIF ... to conform with
2# renderEntity etc ...
3#
4from ndgUtils.models.utilities import formatDateYYYY
5from ndgUtils.models.molesbounding import Bounding as Bounding
6from People2 import *
7from AccessControl import AccessControl
8from ndgUtils.ETxmlView import loadET, nsdumb
9from ServiceBinding import DIFService
10from ndgUtils.ndgObject import ndgObject
11from stubB import collapse2
12import urllib, logging
13
14try: #python 2.5
15    from xml.etree import ElementTree as ET
16except ImportError:
17    try:
18        # if you've installed it yourself it comes this way
19        import ElementTree as ET
20    except ImportError:
21        # if you've egged it this is the way it comes
22        from elementtree import ElementTree as ET
23debug=1
24   
25def shortLong(targetList,s,l):
26    ''' Format a short-long name combination as a string, and add to a list if non-empty,
27    and a non-duplicate'''
28    r=''
29    if l<>'':
30        if s<>'': 
31            r='%s (%s)'%(l,s)
32        else:
33            r=l
34    elif s<>'': r=s
35    if r not in targetList: targetList.append(r)
36    return targetList
37
38class DIF:
39    ''' Supports the NASA GCMD DIF format for python operations,
40    note ... not a complete implementation, currently minimum to
41    show a reasonable piece of content '''
42    def __init__(self,xml,et=0,debug=0,ndgObj=None):
43       
44        '''Initialise a python dif instance based on an xml document (expected
45        to be an input string if et=0, otherwise an ElementTree instance) '''
46        logging.info("Setting up DIF data model")
47        if et:
48            try:
49                self.tree=xml
50                self.xml=ET.tostring(xml)
51            except:
52                raise TypeError,'DIF input is not a valid ElementTree instance'
53        else:
54            try:
55                self.tree=loadET(xml)
56                self.xml=xml
57            except: # for some reason we can't parse the document, and it's worth knowing why
58                if isinstance(xml,str):
59                    l=min(len(xml),300)-1
60                    if l!=len(xml):xml=xml[0:l]+'\n...'
61                    raise ValueError('DIF input cannot be parsed into an ElementTree instance:\n%s'%xml)
62                else:
63                    raise TypeError('DIF input of type [%s] needs to be a string!'%type(xml))
64       
65        self.debug=debug
66       
67        # if this is an ndgObject that'll have been sorted externally ...
68        self.ndgObject=ndgObj
69       
70        # now try and interpret it
71       
72        helper=nsdumb(self.tree)
73        self.metadataType='DIF'
74        if helper.strip(self.tree.tag)!=self.metadataType: 
75            self.tree=helper.find(self.tree,self.metadataType)
76            if self.tree is None: 
77                raise ValueError, 'DIF input does not include a DIF element:\n%s'%self.xml
78       
79        self.entryID=helper.getText(self.tree,'Entry_ID')
80        self.abstract=helper.getText(self.tree,'Summary')
81        self.name=helper.getText(self.tree,'Entry_Title')
82        self.abbreviation=self.name[0:min(5,len(self.name))]
83       
84        #Note that entity.constraints.html is about access control on the metadata,
85        #and so we don't populate this here ...
86        self.constraints=AccessControl(None)
87        self.AccessConstraints=helper.getText(self.tree,'Access_Constraints')
88        self.UseConstraints=helper.getText(self.tree,'Use_Constraints')
89        self.distribution=[]
90        for e in helper.findall(self.tree,'Distribution'):
91            d=dict([(k,helper.getText(e,k)) for k in 
92                ('Fees','Distribution_Media','Distribution_Size','Distribution_Format')])
93            addit=0
94            for k in d: 
95                if d[k]!='': addit=1
96            if addit: self.distribution.append(d)
97        #need entity.parameters, entity.bbox, entity.timeCoverage, entity.curator, entity.creators
98        # nb, two ways of parameters being stored
99     
100        self.parameters=[]
101        for parameter in helper.findall(self.tree,'Parameters'):
102            name=''
103            for level in ['Category','Topic','Term','Variable','Detailed_Variable']:
104                    name+=helper.getText(parameter,level)+'>'
105            name=name.rstrip('>')
106            if name=='':
107                name=helper.getText(parameter,'.')#.text
108                if name is not None: 
109                    #self.parameters.append(name.replace('>','/'))
110                    while len(name.split('>'))<5:name+='>'
111            self.parameters.append(name)
112        self.parameters=collapse2(self.parameters)
113       
114        #sensors are pretty important too
115        self.sensors=[]
116        for sensor in helper.findall(self.tree,'Sensor_Name'):
117            s,l=helper.getText(sensor,'Short_Name'),helper.getText(sensor,'Long_Name')
118            self.sensors=shortLong(self.sensors,s,l)
119               
120        #and sources
121        self.sources=[]
122        for source in helper.findall(self.tree,'Source_Name'):
123            s,l=helper.getText(source,'Short_Name'),helper.getText(source,'Long_Name')
124            self.sources=shortLong(self.sources,s,l)
125
126        self.projects=[]
127        for project in helper.findall(self.tree,'Project'):
128            s,l=helper.getText(project,'Short_Name'),helper.getText(project,'Long_Name')
129            self.projects=shortLong(self.projects,s,l)
130
131
132        #load up information about spatial bounding box
133        self.bbox=Bounding(self.tree,helper,entity='DIF')
134       
135        #load up information about temporal extent
136        # nb, some DIFS use Stop Date, some use End Date
137        date=helper.find(self.tree,'Temporal_Coverage')
138        e1,e2=helper.getText(date,'Stop_Date'),helper.getText(date,'End_Date')
139        start=helper.getText(date,'Start_Date')
140        status=helper.getText(self.tree,'Data_Set_Progress')
141        if e1<>'':
142            self.timeCoverage=[(start,e1,status)]
143        else:
144            self.timeCoverage=[(start,e2,status)]
145       
146        #load up those silly paleo keywords
147        self.paleoKeywords=[]
148        paleoElements=helper.findall(self.tree,'Paleo_Temporal_Coverage')
149        for e in paleoElements:
150            self.paleoKeywords.append(helper.getText(e,'Chronostratigraphic_Unit'))
151           
152        #Data curator information
153        self.centre=DIFcontact(helper,helper.find(self.tree,'Data_Center'))
154        #self.curator=DIFcontact(helper,helper.find(self.tree,'Data_Curator'))
155       
156        #Data Creators
157        self.creators=[]
158        # Let's first see if we get an author out of the citation,
159        self.authors=helper.getText(self.tree,'Data_Set_Citation/Dataset_Creator') 
160        self.date=formatDateYYYY(helper.getText(self.tree,'Data_Set_Citation/Dataset_Release_Date'))
161        self.title=helper.getText(self.tree,'Data_Set_Citation/Dataset_Title')
162        self.briefCitation=''
163        if (self.authors!='' and self.date!='' and self.title!=''):
164            self.briefCitation='%s (%s): %s'%(self.authors,self.date,self.title)
165        #for now
166        self.citation=self.briefCitation
167       
168        #an empty element sends something back, we don't want that:
169        self.personnel=[DIFcontact(helper,f) for f in helper.findall(self.tree,'Personnel')]
170        #services
171        self.services=[]
172       
173        # not all DIFs have KML resource available - set default value
174        # to the entryID to allow routing to work properly
175        self.kmlURL = []
176        self.wmcURL = []
177        for item in helper.findall(self.tree,'Related_URL'):
178            contentType=helper.getText(item,'URL_Content_Type')
179            serviceURL=helper.cleanup.undo(helper.getText(item,'URL'))
180            self.services.append(
181                DIFService(
182                 contentType,
183                 serviceURL,
184                 helper.getText(item,'Description') ))
185            # if there is a KML URL, encode it and add it as a field for
186            # easy access to store in session data for selectedItems
187            # NB, it needs to be encoded to avoid problems whilst sending
188            # via AJAX
189            if contentType.find('KML') > 0:
190                # NB, adding the '%' to quote forces forward slashes to be replaced
191                # with unicode equivalent but doesn't escape escape sequences.
192                self.kmlURL.append(urllib.quote(serviceURL,'%'))
193            elif contentType.find('WEB MAP CONTEXT') > 0:
194                self.wmcURL.append(urllib.quote(serviceURL,'%'))
195               
196        if self.ndgObject is None:
197            self.binding=None
198        else:
199            if self.ndgObject.discoveryURL is not None:
200                self.binding=DIFService('DISCOVERY',self.ndgObject.discoveryURL,'Discovery record')
201                #explicitly do late binding on NDG-B URL
202                #... and override contents in metadatarecord
203                #B='%s__%s__%s'%(self.ndgObject.repository,'NDG-B1',self.ndgObject.localID)
204                #Bobject=ndgObject(B,config=self.ndgObject.config)
205                #bdone=0
206                #if Bobject.gettable!=-1:
207                #    for s in self.services:
208                #        if s.contentType=='NDG_B_SERVICE':
209                #            s.url=Bobject.URL
210                #            bdone=1
211                #    if not bdone: self.services.append(
212                #                DIFService('NDG_B_SERVICE',Bobject.URL,'NDG Browse Metadata Service'))
213            else: self.binding=None
214        if self.debug:
215            f=open('difs.log','a')
216            f.write('%s##\n%s\n##################################\n'%(self.entryID,self.xml))
217            f.close()
218           
219        logging.info("DIF data model set up")
220
221       
Note: See TracBrowser for help on using the repository browser.