1 | ''' |
---|
2 | python class to support methods on a DIF ... to conform with |
---|
3 | renderEntity etc ... |
---|
4 | @author: D Lowe? |
---|
5 | ''' |
---|
6 | from ndg.common.src.lib.utilities import formatDateYYYY |
---|
7 | from molesbounding import Bounding as Bounding |
---|
8 | from People2 import * |
---|
9 | from AccessControl import AccessControl |
---|
10 | from ndg.common.src.lib.ETxmlView import loadET, nsdumb |
---|
11 | from ServiceBinding import DIFService |
---|
12 | from ndg.common.src.models import ndgObject |
---|
13 | from stubB import collapse2 |
---|
14 | import urllib, logging |
---|
15 | from xml.etree import ElementTree as ET |
---|
16 | |
---|
17 | def shortLong(targetList,s,l): |
---|
18 | ''' Format a short-long name combination as a string, and add to a list if non-empty, |
---|
19 | and a non-duplicate''' |
---|
20 | r='' |
---|
21 | if l<>'': |
---|
22 | if s<>'': |
---|
23 | r='%s (%s)'%(l,s) |
---|
24 | else: |
---|
25 | r=l |
---|
26 | elif s<>'': r=s |
---|
27 | if r not in targetList: targetList.append(r) |
---|
28 | return targetList |
---|
29 | |
---|
30 | class DIF: |
---|
31 | ''' Supports the NASA GCMD DIF format for python operations, |
---|
32 | note ... not a complete implementation, currently minimum to |
---|
33 | show a reasonable piece of content ''' |
---|
34 | def __init__(self,xml,et=0,debug=0,ndgObj=None): |
---|
35 | |
---|
36 | '''Initialise a python dif instance based on an xml document (expected |
---|
37 | to be an input string if et=0, otherwise an ElementTree instance) ''' |
---|
38 | logging.info("Setting up DIF data model") |
---|
39 | if et: |
---|
40 | try: |
---|
41 | self.tree=xml |
---|
42 | self.xml=ET.tostring(xml) |
---|
43 | except: |
---|
44 | raise TypeError,'DIF input is not a valid ElementTree instance' |
---|
45 | else: |
---|
46 | try: |
---|
47 | self.tree=loadET(xml) |
---|
48 | self.xml=xml |
---|
49 | except: # for some reason we can't parse the document, and it's worth knowing why |
---|
50 | if isinstance(xml,str): |
---|
51 | l=min(len(xml),300)-1 |
---|
52 | if l!=len(xml):xml=xml[0:l]+'\n...' |
---|
53 | raise ValueError('DIF input cannot be parsed into an ElementTree instance:\n%s'%xml) |
---|
54 | else: |
---|
55 | raise TypeError('DIF input of type [%s] needs to be a string!'%type(xml)) |
---|
56 | |
---|
57 | self.debug=debug |
---|
58 | |
---|
59 | # if this is an ndgObject that'll have been sorted externally ... |
---|
60 | self.ndgObject=ndgObj |
---|
61 | |
---|
62 | # now try and interpret it |
---|
63 | |
---|
64 | helper=nsdumb(self.tree) |
---|
65 | self.metadataType='DIF' |
---|
66 | if helper.strip(self.tree.tag)!=self.metadataType: |
---|
67 | self.tree=helper.find(self.tree,self.metadataType) |
---|
68 | if self.tree is None: |
---|
69 | raise ValueError, 'DIF input does not include a DIF element:\n%s'%self.xml |
---|
70 | |
---|
71 | self.entryID=helper.getText(self.tree,'Entry_ID') |
---|
72 | self.abstract=helper.getText(self.tree,'Summary') |
---|
73 | self.name=helper.getText(self.tree,'Entry_Title') |
---|
74 | self.abbreviation=self.name[0:min(5,len(self.name))] |
---|
75 | |
---|
76 | #add some extra parameters here to extract further columns to aid in ranking & ordering |
---|
77 | if len(helper.getText(self.tree,'DIF_Creation_Date')) < 1: |
---|
78 | #set to null as bad timestamp will screw up the sql commands (will scan for it later) |
---|
79 | self.metadataCreationDate='null' |
---|
80 | else: |
---|
81 | self.metadataCreationDate=helper.getText(self.tree,'DIF_Creation_Date') |
---|
82 | |
---|
83 | self.datacentreName=helper.getText(self.tree,'Data_Center/Data_Center_Name/Short_Name') |
---|
84 | |
---|
85 | if len(helper.getText(self.tree,'Data_Set_Citation/Dataset_Title')) < 1: |
---|
86 | self.datasetTitle=self.name |
---|
87 | else: |
---|
88 | self.datasetTitle=helper.getText(self.tree,'Data_Set_Citation/Dataset_Title') |
---|
89 | |
---|
90 | #logging.info('EXTRA INFORMATION for ORDERING= dataset title: ' + self.datasetTitle + ' dataset creation date: ' + self.metadataCreationDate + ' datacentre name: ' + self.datacentreName) |
---|
91 | |
---|
92 | #Note that entity.constraints.html is about access control on the metadata, |
---|
93 | #and so we don't populate this here ... |
---|
94 | self.constraints=AccessControl(None) |
---|
95 | self.AccessConstraints=helper.getText(self.tree,'Access_Constraints') |
---|
96 | self.UseConstraints=helper.getText(self.tree,'Use_Constraints') |
---|
97 | self.distribution=[] |
---|
98 | for e in helper.findall(self.tree,'Distribution'): |
---|
99 | d=dict([(k,helper.getText(e,k)) for k in |
---|
100 | ('Fees','Distribution_Media','Distribution_Size','Distribution_Format')]) |
---|
101 | addit=0 |
---|
102 | for k in d: |
---|
103 | if d[k]!='': addit=1 |
---|
104 | if addit: self.distribution.append(d) |
---|
105 | #need entity.parameters, entity.bbox, entity.timeCoverage, entity.curator, entity.creators |
---|
106 | # nb, two ways of parameters being stored |
---|
107 | |
---|
108 | self.parameters=[] |
---|
109 | for parameter in helper.findall(self.tree,'Parameters'): |
---|
110 | name='' |
---|
111 | for level in ['Category','Topic','Term','Variable','Detailed_Variable']: |
---|
112 | name+=helper.getText(parameter,level)+'>' |
---|
113 | name=name.rstrip('>') |
---|
114 | if name=='': |
---|
115 | name=helper.getText(parameter,'.')#.text |
---|
116 | if name is not None: |
---|
117 | #self.parameters.append(name.replace('>','/')) |
---|
118 | while len(name.split('>'))<5:name+='>' |
---|
119 | self.parameters.append(name) |
---|
120 | self.parameters=collapse2(self.parameters) |
---|
121 | |
---|
122 | #sensors are pretty important too |
---|
123 | self.sensors=[] |
---|
124 | for sensor in helper.findall(self.tree,'Sensor_Name'): |
---|
125 | s,l=helper.getText(sensor,'Short_Name'),helper.getText(sensor,'Long_Name') |
---|
126 | self.sensors=shortLong(self.sensors,s,l) |
---|
127 | |
---|
128 | #and sources |
---|
129 | self.sources=[] |
---|
130 | for source in helper.findall(self.tree,'Source_Name'): |
---|
131 | s,l=helper.getText(source,'Short_Name'),helper.getText(source,'Long_Name') |
---|
132 | self.sources=shortLong(self.sources,s,l) |
---|
133 | |
---|
134 | self.projects=[] |
---|
135 | for project in helper.findall(self.tree,'Project'): |
---|
136 | s,l=helper.getText(project,'Short_Name'),helper.getText(project,'Long_Name') |
---|
137 | self.projects=shortLong(self.projects,s,l) |
---|
138 | |
---|
139 | |
---|
140 | #load up information about spatial bounding box |
---|
141 | self.bbox=Bounding(self.tree,helper,entity='DIF') |
---|
142 | |
---|
143 | #load up information about temporal extent |
---|
144 | # nb, some DIFS use Stop Date, some use End Date |
---|
145 | date=helper.find(self.tree,'Temporal_Coverage') |
---|
146 | e1,e2=helper.getText(date,'Stop_Date'),helper.getText(date,'End_Date') |
---|
147 | start=helper.getText(date,'Start_Date') |
---|
148 | status=helper.getText(self.tree,'Data_Set_Progress') |
---|
149 | if e1<>'': |
---|
150 | self.timeCoverage=[(start,e1,status)] |
---|
151 | else: |
---|
152 | self.timeCoverage=[(start,e2,status)] |
---|
153 | |
---|
154 | #load up those silly paleo keywords |
---|
155 | self.paleoKeywords=[] |
---|
156 | paleoElements=helper.findall(self.tree,'Paleo_Temporal_Coverage') |
---|
157 | for e in paleoElements: |
---|
158 | self.paleoKeywords.append(helper.getText(e,'Chronostratigraphic_Unit')) |
---|
159 | |
---|
160 | #Data curator information |
---|
161 | self.centre=DIFcontact(helper,helper.find(self.tree,'Data_Center')) |
---|
162 | #self.curator=DIFcontact(helper,helper.find(self.tree,'Data_Curator')) |
---|
163 | |
---|
164 | #Data Creators |
---|
165 | self.creators=[] |
---|
166 | # Let's first see if we get an author out of the citation, |
---|
167 | self.authors=helper.getText(self.tree,'Data_Set_Citation/Dataset_Creator') |
---|
168 | self.date=formatDateYYYY(helper.getText(self.tree,'Data_Set_Citation/Dataset_Release_Date')) |
---|
169 | self.title=helper.getText(self.tree,'Data_Set_Citation/Dataset_Title') |
---|
170 | self.briefCitation='' |
---|
171 | if (self.authors!='' and self.date!='' and self.title!=''): |
---|
172 | self.briefCitation='%s (%s): %s'%(self.authors,self.date,self.title) |
---|
173 | #for now |
---|
174 | self.citation=self.briefCitation |
---|
175 | |
---|
176 | #an empty element sends something back, we don't want that: |
---|
177 | self.personnel=[DIFcontact(helper,f) for f in helper.findall(self.tree,'Personnel')] |
---|
178 | #services |
---|
179 | self.services=[] |
---|
180 | |
---|
181 | # not all DIFs have KML resource available - set default value |
---|
182 | # to the entryID to allow routing to work properly |
---|
183 | self.kmlURL = [] |
---|
184 | self.wmcURL = [] |
---|
185 | for item in helper.findall(self.tree,'Related_URL'): |
---|
186 | contentType=helper.getText(item,'URL_Content_Type') |
---|
187 | serviceURL=helper.cleanup.undo(helper.getText(item,'URL')) |
---|
188 | self.services.append( |
---|
189 | DIFService( |
---|
190 | contentType, |
---|
191 | serviceURL, |
---|
192 | helper.getText(item,'Description') )) |
---|
193 | # if there is a KML URL, encode it and add it as a field for |
---|
194 | # easy access to store in session data for selectedItems |
---|
195 | # NB, it needs to be encoded to avoid problems whilst sending |
---|
196 | # via AJAX |
---|
197 | if contentType.find('KML') > 0: |
---|
198 | # NB, adding the '%' to quote forces forward slashes to be replaced |
---|
199 | # with unicode equivalent but doesn't escape escape sequences. |
---|
200 | self.kmlURL.append(urllib.quote(serviceURL,'%')) |
---|
201 | elif contentType.find('WEB MAP CONTEXT') > 0: |
---|
202 | self.wmcURL.append(urllib.quote(serviceURL,'%')) |
---|
203 | |
---|
204 | if self.ndgObject is None: |
---|
205 | self.binding=None |
---|
206 | else: |
---|
207 | if self.ndgObject.discoveryURL is not None: |
---|
208 | self.binding=DIFService('DISCOVERY',self.ndgObject.discoveryURL,'Discovery record') |
---|
209 | self.discoveryViewURL = self.ndgObject.discoveryURL |
---|
210 | #explicitly do late binding on NDG-B URL |
---|
211 | #... and override contents in metadatarecord |
---|
212 | #B='%s__%s__%s'%(self.ndgObject.repository,'NDG-B1',self.ndgObject.localID) |
---|
213 | #Bobject=ndgObject(B,config=self.ndgObject.config) |
---|
214 | #bdone=0 |
---|
215 | #if Bobject.gettable!=-1: |
---|
216 | # for s in self.services: |
---|
217 | # if s.contentType=='NDG_B_SERVICE': |
---|
218 | # s.url=Bobject.URL |
---|
219 | # bdone=1 |
---|
220 | # if not bdone: self.services.append( |
---|
221 | # DIFService('NDG_B_SERVICE',Bobject.URL,'NDG Browse Metadata Service')) |
---|
222 | else: self.binding=None |
---|
223 | if self.debug: |
---|
224 | f=open('difs.log','a') |
---|
225 | f.write('%s##\n%s\n##################################\n'%(self.entryID,self.xml)) |
---|
226 | f.close() |
---|
227 | |
---|
228 | logging.info("DIF data model set up") |
---|
229 | |
---|
230 | |
---|