1 | ''' |
---|
2 | Controller for the discovery search functionality |
---|
3 | ''' |
---|
4 | import socket, logging |
---|
5 | from paste.request import parse_querystring |
---|
6 | from ndg.common.src.clients.ws.discovery.discoveryserviceclient import DiscoveryServiceClient |
---|
7 | from ndg.common.src.clients.xmldb.eXist.searchclient import SearchClient |
---|
8 | from ndg.common.src.clients.http.vocabserverclient import VocabServerClient as VS |
---|
9 | from ndg.common.src.models.ndgObject import ndgObject |
---|
10 | from ndg.common.src.models.DIF import DIF |
---|
11 | from milk_server.lib.base import * |
---|
12 | from milk_server.lib.Date import * |
---|
13 | from milk_server.models.DiscoveryState import DiscoveryState,constraints |
---|
14 | from milk_server.lib.mailer import mailHandler |
---|
15 | |
---|
16 | class DiscoveryController(BaseController): |
---|
17 | ''' Provides the pylons controller for NDG discovery ''' |
---|
18 | |
---|
19 | def __setup(self): |
---|
20 | ''' Common setup for controller methods ''' |
---|
21 | self.cf=request.environ['ndgConfig'] |
---|
22 | self.exist=(self.cf.get('NDG_EXIST','local'), g.pwFile) |
---|
23 | self.inputs=dict(parse_querystring(request.environ)) |
---|
24 | self.message='' |
---|
25 | |
---|
26 | |
---|
27 | def index(self): |
---|
28 | |
---|
29 | self.__setup() |
---|
30 | # parse the query string and hand off to a discovery engine |
---|
31 | |
---|
32 | if self.inputs=={} or 'ClearForm' in self.inputs: |
---|
33 | return self.__advancedPrompt() |
---|
34 | |
---|
35 | # see if this is a discovery search or a more complicated search |
---|
36 | if 'searchTarget' not in self.inputs: |
---|
37 | self.inputs['searchTarget']='Discovery' |
---|
38 | |
---|
39 | #the following need to be defined |
---|
40 | continuations={'start':1,'howmany':10} |
---|
41 | for i in continuations: |
---|
42 | if i not in self.inputs: self.inputs[i]=continuations[i] |
---|
43 | |
---|
44 | |
---|
45 | # the simplest query we might get is a text search, in which case |
---|
46 | # the inputs should be start, howmany and searchString (although |
---|
47 | # maybe not in that order. The next simplest is one with |
---|
48 | # a specified textTarget, after that we need all the inputs. |
---|
49 | |
---|
50 | if 'searchString' in self.inputs and 'textTarget' not in self.inputs: |
---|
51 | # it's a simple text search |
---|
52 | self.inputs['textTarget']='All' |
---|
53 | |
---|
54 | # the next simplest is one that includes texttarget as well ... |
---|
55 | expected=['searchString','textTarget','start','howmany','searchTarget'] |
---|
56 | self.__checkform(expected) |
---|
57 | |
---|
58 | if self.message!='': |
---|
59 | c.xml='Simple %s:'%self.message |
---|
60 | return render('content') |
---|
61 | |
---|
62 | |
---|
63 | if 'geoSearchType' not in self.inputs:self.inputs['geoSearchType']='overlaps' |
---|
64 | if len(self.inputs)==6: |
---|
65 | |
---|
66 | # now we add the defaults ... |
---|
67 | # this is kind of historical ... |
---|
68 | bbox=None |
---|
69 | dateRange=None |
---|
70 | scope=None |
---|
71 | |
---|
72 | else: |
---|
73 | |
---|
74 | # ------------- Handle scope from radio button on form ------- |
---|
75 | if 'source' in self.inputs: |
---|
76 | # the WSDL expects a list, we're just providing one ... via a radio ... |
---|
77 | scope=[self.inputs['source']] |
---|
78 | if scope==['All']: scope=None |
---|
79 | else: |
---|
80 | scope=None |
---|
81 | |
---|
82 | expected=['bboxN','bboxE','bboxS','bboxW','geoSearchType'] |
---|
83 | self.__checkform(expected) |
---|
84 | if self.message!='': |
---|
85 | self.message='' |
---|
86 | bbox=None |
---|
87 | else: |
---|
88 | # default form has a global bounding box, NB, internal to this routine we use bbox=[N,W,E,S], not [W,S,E,N]! |
---|
89 | bbox=[self.inputs['bboxN'],self.inputs['bboxW'],self.inputs['bboxE'],self.inputs['bboxS']] |
---|
90 | |
---|
91 | self.__checkbox(bbox) |
---|
92 | if self.message!='': |
---|
93 | c.xml=self.message |
---|
94 | return render('content') |
---|
95 | |
---|
96 | |
---|
97 | expected=['startDateDay','startDateMon','startDateYear', |
---|
98 | 'endDateDay','endDateMon','endDateYear'] |
---|
99 | self.__checkform(expected) |
---|
100 | if self.message!='': |
---|
101 | self.message='' |
---|
102 | dateRange=None |
---|
103 | else: |
---|
104 | try: |
---|
105 | dateRange=[(self.inputs['startDateDay'],self.inputs['startDateMon'],self.inputs['startDateYear']), |
---|
106 | (self.inputs['endDateDay'],self.inputs['endDateMon'],self.inputs['endDateYear'])] |
---|
107 | #default form has blanks, in which case we don't want to check for date range |
---|
108 | if dateRange<>[("","",""),("","","")]: |
---|
109 | self.__checkdates(dateRange) |
---|
110 | else: dateRange=None |
---|
111 | except: |
---|
112 | self.message='Invalid date provided' |
---|
113 | if self.message!='': |
---|
114 | c.xml=self.message |
---|
115 | return render('content') |
---|
116 | |
---|
117 | if 'constrained' in self.inputs: |
---|
118 | con=self.__buildconstraints(dateRange,bbox,scope,self.inputs['searchString'],self.inputs['geoSearchType']) |
---|
119 | return self.__advancedPrompt(searchConstraints=con) |
---|
120 | else: |
---|
121 | # ------------- ok, now go do the search ----------- |
---|
122 | response=self.doText(self.inputs['searchString'],self.inputs['textTarget'], |
---|
123 | self.inputs['start'],self.inputs['howmany'],scope=scope,dateRange=dateRange,bbox=bbox, |
---|
124 | geoSearch=self.inputs['geoSearchType']) |
---|
125 | return response |
---|
126 | |
---|
127 | |
---|
128 | def doText(self,searchString,textTarget,start, \ |
---|
129 | howmany,scope=None,dateRange=None,bbox=None,geoSearch='overlaps'): |
---|
130 | |
---|
131 | ''' Carry out a text search for <searchString> |
---|
132 | in the <textTarget> where the accepted text target values are controlled |
---|
133 | by the DiscoveryTemplate GUI, and are: All, Authors, Parameters ''' |
---|
134 | logging.info("'doText' invoke with string, '%s'" %searchString) |
---|
135 | start,howmany=int(start),int(howmany) # url arguments need conversion ... |
---|
136 | |
---|
137 | if self.inputs['searchTarget']=='Discovery': |
---|
138 | logging.info(" - use Discovery service to complete search") |
---|
139 | url = None |
---|
140 | if hasattr(g, 'discoveryServiceURL'): |
---|
141 | url = g.discoveryServiceURL |
---|
142 | ws = DiscoveryServiceClient(HostAndPort=url) |
---|
143 | elif self.inputs['searchTarget'] in ['Browse','NumSim']: |
---|
144 | logging.info(" - use Browse service to complete search") |
---|
145 | ws = SearchClient(dbHostName = self.exist[0], |
---|
146 | configFileName = self.exist[1]) |
---|
147 | #overriding text target which is ignored currently ... yuck ... |
---|
148 | textTarget=self.inputs['searchTarget'] |
---|
149 | if textTarget=='Browse':textTarget='ndg_B_metadata' |
---|
150 | else: |
---|
151 | logging.error("Unrecognised search type, '%s'" \ |
---|
152 | %self.inputs['searchTarget']) |
---|
153 | c.xml='Unknown searchTarget %s'%self.inputs['searchTarget'] |
---|
154 | return render('error') |
---|
155 | |
---|
156 | # PJK 04/09/08 Handle errors more gracefully |
---|
157 | # |
---|
158 | # http://proj.badc.rl.ac.uk/ndg/ticket/984 |
---|
159 | try: |
---|
160 | documents=ws.search(searchString, |
---|
161 | start=start, |
---|
162 | howmany=howmany, |
---|
163 | target=textTarget, |
---|
164 | scope=scope, |
---|
165 | dateRange=dateRange, |
---|
166 | bbox=bbox, |
---|
167 | geoSearchType=geoSearch) |
---|
168 | except socket.error, e: |
---|
169 | logging.error("Socket error for discovery service search: %s" % e) |
---|
170 | c.xml='The Discovery Service is unavailable. Please check with '+\ |
---|
171 | 'your system administrator' |
---|
172 | return render('error') |
---|
173 | except Exception, e: |
---|
174 | logging.error("Calling discovery service search: %s" % e) |
---|
175 | c.xml='An internal error occured. Please check with ' + \ |
---|
176 | 'your system administrator' |
---|
177 | return render('error') |
---|
178 | |
---|
179 | logging.info("'doText()' returned - now processing results") |
---|
180 | if ws.error !=None: |
---|
181 | logging.error("Error encountered whilst running search: %s" %ws.error) |
---|
182 | m='' |
---|
183 | for i in ws.error:m+='<p>%s</p>'%i |
---|
184 | c.xml=m |
---|
185 | return render('content') |
---|
186 | |
---|
187 | #build constraints info for report |
---|
188 | searchConstraints=self.__buildconstraints(dateRange,bbox,scope,\ |
---|
189 | searchString,geoSearch) |
---|
190 | hits=ws.hits |
---|
191 | if hits==0: |
---|
192 | outMessage = 'No records found [contraints: %s]' %searchConstraints |
---|
193 | logging.info(outMessage) |
---|
194 | c.xml='<p>' + outMessage + '</p>' |
---|
195 | return render('content') |
---|
196 | |
---|
197 | id=ws.serverSessionID |
---|
198 | |
---|
199 | if hits < howmany: |
---|
200 | howmany = hits |
---|
201 | |
---|
202 | # DiscoveryState object is a wrapper to the various search config |
---|
203 | # variables |
---|
204 | c.state=DiscoveryState(id,searchString,request.environ,\ |
---|
205 | hits,searchConstraints,start,howmany) |
---|
206 | c.querystring=request.environ['QUERY_STRING'] |
---|
207 | |
---|
208 | try: |
---|
209 | if self.inputs['searchTarget']=='Discovery': |
---|
210 | results=ws.getLabelledDocs(format='DIF') |
---|
211 | else: |
---|
212 | return self.moreSearch(ws) |
---|
213 | |
---|
214 | if results==[]: |
---|
215 | c.xml='<p> No results for "%s"!</p>'%searchString |
---|
216 | return render('content') |
---|
217 | |
---|
218 | difs=[] |
---|
219 | errors=[] |
---|
220 | for result in results: |
---|
221 | obj=ndgObject(result[0], config = self.cf) |
---|
222 | try: |
---|
223 | difs.append(DIF(result[1],ndgObj=obj)) |
---|
224 | except ValueError,e: |
---|
225 | errors.append((result[0],str(e))) |
---|
226 | |
---|
227 | if difs==[]: |
---|
228 | c.xml='<p>No usable results for "%s"!</p>'%searchString |
---|
229 | return render('content') |
---|
230 | elif errors: |
---|
231 | c.xml='<p>Search results for "%s"'%searchString |
---|
232 | dp=[] |
---|
233 | for e in errors: |
---|
234 | n=ndgObject(e[0]) |
---|
235 | if n.repository not in dp: dp.append(n.repository) |
---|
236 | if len(dp)<>1: |
---|
237 | dp='[Various Data Providers]' |
---|
238 | else: |
---|
239 | dp='[%s]'%dp[0] |
---|
240 | c.xml+=' (unfortunately %s hits matched unformattable documents from %s, an internal error has been logged):</p>'%(len(errors),dp) |
---|
241 | status,message=mailHandler(['b.n.lawrence@rl.ac.uk'],'DIF errors',str(errors), |
---|
242 | server=self.cf.get('DEFAULT','mailserver')) |
---|
243 | if not status: |
---|
244 | c.xml+='<p> Actually, not even an internal error has been logged. <br/>' |
---|
245 | c.xml+='Internal sending of mail failed with error [%s]</p>'%message |
---|
246 | return render('content') |
---|
247 | else: |
---|
248 | c.difs=difs |
---|
249 | session['results']=h.current_url() |
---|
250 | session.save() |
---|
251 | |
---|
252 | # set up the displayed tabs |
---|
253 | if len(c.pageTabs)==1: |
---|
254 | c.pageTabs.append(('Results',session['results'])) |
---|
255 | c.pageTabs.append(('Selections', |
---|
256 | h.url_for(controller='browse/selectedItems', |
---|
257 | action='index'))) |
---|
258 | elif c.pageTabs[1][0]!='Results': |
---|
259 | c.pageTabs.insert(1,('Results',session['results'])) |
---|
260 | selectionsNeeded=1 |
---|
261 | for tab in c.pageTabs[0]: |
---|
262 | if tab == 'Selections': |
---|
263 | selectionsNeeded=0 |
---|
264 | if selectionsNeeded: |
---|
265 | c.pageTabs.append(('Selections', |
---|
266 | h.url_for(controller='browse/selectedItems', |
---|
267 | action='index'))) |
---|
268 | |
---|
269 | return render('browse/results') |
---|
270 | |
---|
271 | except ValueError,e: |
---|
272 | if g.debugModeOn == 'True': |
---|
273 | raise ValueError,str(e) |
---|
274 | else: |
---|
275 | c.xml='<p> Error retrieving documents for %s hits is [%s]</p>'%(hits,e) |
---|
276 | return render('content') |
---|
277 | except Exception,e: |
---|
278 | c.xml='Unknown error %s,%s'%(str(Exception),e) |
---|
279 | return render('error') |
---|
280 | |
---|
281 | def __advancedPrompt(self,searchConstraints=None): |
---|
282 | ''' This provides the advanced search input page ''' |
---|
283 | try: |
---|
284 | discoveryURL=self.cf.get('SEARCH','discoveryURL') |
---|
285 | advancedURL=self.cf.get('SEARCH','advancedURL') |
---|
286 | except: |
---|
287 | return 'Error, invalid configuration for search interface' |
---|
288 | #defaults |
---|
289 | c.bbox='90.0','-180.0','180.0','-90.0' |
---|
290 | c.startDateDay,c.startDateMon,c.startDateYear='','','' |
---|
291 | c.endDateDay,c.endDateMon,c.endDateYear='','','' |
---|
292 | c.textTarget='All' |
---|
293 | c.searchString='' |
---|
294 | c.source=['All'] |
---|
295 | c.geoSearchType='overlaps' |
---|
296 | #constraints |
---|
297 | |
---|
298 | if searchConstraints is not None: |
---|
299 | if searchConstraints['dateRange'] is not None: |
---|
300 | c.startDateDay,c.startDateMon,c.startDateYear=searchConstraints['dateRange'][0] |
---|
301 | c.endDateDay,c.endDateMon,c.endDateYear=searchConstraints['dateRange'][1] |
---|
302 | if searchConstraints['bbox'] is not None: |
---|
303 | c.bbox=searchConstraints['bbox'] |
---|
304 | if searchConstraints['textTarget'] is not None: |
---|
305 | c.textTarget=searchConstraints['textTarget'] |
---|
306 | if searchConstraints['searchString'] is not None: |
---|
307 | c.searchString=searchConstraints['searchString'] |
---|
308 | if searchConstraints['scope'] is not None: |
---|
309 | c.source=searchConstraints['scope'] |
---|
310 | c.geoSearchType=(searchConstraints['geoSearchType'] or 'overlaps') |
---|
311 | return render('browse/advanced') |
---|
312 | |
---|
313 | def __checkbox(self,bbox): |
---|
314 | m='Invalid bounding box dimensions entered - limits are ' |
---|
315 | if float(bbox[0])>90.0 or float(bbox[3])<-90.: |
---|
316 | self.message=m+'+90 (N), -90 (S)!' |
---|
317 | if float(bbox[1])<-180. or float(bbox[2])>180.: |
---|
318 | if self.message=='':self.message=m |
---|
319 | self.message=self.message[:-1]+' -180 (W), 180 (E)!' |
---|
320 | |
---|
321 | def __checkform(self,expected): |
---|
322 | ''' Simply checks the inputs to make sure the elements in expected are present ''' |
---|
323 | message="An incomplete NDG search form was received: " |
---|
324 | for i in expected: |
---|
325 | if i not in self.inputs: |
---|
326 | self.message=message+i |
---|
327 | if self.message!='':self.message+='[%s]'%self.inputs |
---|
328 | |
---|
329 | def __checkdates(self,dateRange): |
---|
330 | ''' Check input dates for sanity ''' |
---|
331 | |
---|
332 | if not ValidDate(dateRange[0])*ValidDate(dateRange[1]): |
---|
333 | self.message='Input dates are not valid [%s]'%dateRange |
---|
334 | elif JulDay(dateRange[0])>=JulDay(dateRange[1]): |
---|
335 | self.message='Second date must be after first date' |
---|
336 | |
---|
337 | def __buildconstraints(self,dateRange,bbox,scope,searchString,geoSearch): |
---|
338 | ''' Just build a constraint string ''' |
---|
339 | return constraints(dateRange=dateRange,bbox=bbox,scope=scope,searchString=searchString,geoSearchType=geoSearch) |
---|
340 | |
---|
341 | |
---|
342 | def semantic(self): |
---|
343 | self.__setup() |
---|
344 | vs = VS(proxyServer=self.cf.get('DEFAULT','proxyServer')) |
---|
345 | if 'searchString' in self.inputs: |
---|
346 | try: |
---|
347 | [broader,narrower,synonyms] = vs.getRelated(self.inputs['searchString']) |
---|
348 | #get a base string for the links to new searches |
---|
349 | if 'start' in self.inputs: del self.inputs['start'] |
---|
350 | if 'howmany' in self.inputs: del self.inputs['howmany'] |
---|
351 | self.inputs['searchString']='###SEARCHSSTRING###' |
---|
352 | q='%s/discovery?'%g.server |
---|
353 | for i in self.inputs: q+='%s=%s&'%(i,self.inputs[i]) |
---|
354 | url=q[0:-1] |
---|
355 | # and now build the links |
---|
356 | c.narrower=[] |
---|
357 | c.broader=[] |
---|
358 | c.synonyms=[] |
---|
359 | for i in narrower: |
---|
360 | c.narrower.append((i,url.replace('###SEARCHSSTRING###',i))) |
---|
361 | for i in broader: |
---|
362 | c.broader.append((i,url.replace('###SEARCHSSTRING###',i))) |
---|
363 | for i in synonyms: |
---|
364 | c.synonyms.append((i,url.replace('###SEARCHSSTRING###',i))) |
---|
365 | if c.narrower!=[] or c.broader!=[] or c.synonyms!=[]: c.semAvailable=1 |
---|
366 | except IOError,e: |
---|
367 | c.semAvailable=0 |
---|
368 | c.semError=' (No valid reply from vocabulary service)' |
---|
369 | #This should go in a log file ... |
---|
370 | print 'ERROR: Vocabulary Service: %s (for search [%s])'%(str(e),self.inputs['searchString']) |
---|
371 | else: |
---|
372 | broader,narrower,synonyms=[],[],[] |
---|
373 | c.semAvailable=0 |
---|
374 | c.semError='.' |
---|
375 | |
---|
376 | return render('browse/semantic',fragment=True) |
---|
377 | |
---|
378 | def moreSearch(self,ws): |
---|
379 | ''' Provides the search on Browse and NumSim content ''' |
---|
380 | c.results=ws.results |
---|
381 | c.searchTarget=self.inputs['searchTarget'] |
---|
382 | |
---|
383 | for r in c.results: |
---|
384 | id = r.id |
---|
385 | # cope with atom docs |
---|
386 | if id.startswith('tag'): |
---|
387 | id = id.split('/')[-1] |
---|
388 | n=ndgObject(id,config=self.cf) |
---|
389 | r.link={'Browse':n.BURL,'NumSim':n.URL}[c.searchTarget] |
---|
390 | |
---|
391 | return render('browse/short_results') |
---|
392 | |
---|
393 | def clearSession(self): |
---|
394 | ''' Clear out all session variables - to help when these change in development ''' |
---|
395 | session.clear() |
---|
396 | session.save() |
---|
397 | |
---|