source: MILK/trunk/milk_server/milk_server/controllers/browse/discovery.py @ 5261

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/MILK/trunk/milk_server/milk_server/controllers/browse/discovery.py@5261
Revision 5261, 23.9 KB checked in by cbyrom, 11 years ago (diff)

Lots of tidy ups to MILK codebase:

Implement new input search filters - with javascript datapickers to
pick the date ranges + add the vocab search ahead text input and
combine this with the text input.

Refactor discovery controller to tidy it up significantly - making more
structured and improving error handling and logging. Improve
templates for the search filter, splitting into multiple files to
organise better.

Various tidying up and tweaks of other codebase - e.g. standardising
use of global variables across app.

Line 
1'''
2Controller for the discovery search functionality
3'''
4import socket, logging
5from paste.request import parse_querystring
6from ndg.common.src.clients.ws.discovery.discoveryserviceclient import DiscoveryServiceClient
7from ndg.common.src.clients.xmldb.eXist.searchclient import SearchClient
8from ndg.common.src.clients.http.vocabserverclient import VocabServerClient as VS
9from ndg.common.src.models.vocabtermdata import VocabTermData as VTD
10from ndg.common.src.models.ndgObject import ndgObject
11from ndg.common.src.models.DIF import DIF
12from ndg.common.src.lib.mailer import mailHandler
13from milk_server.lib.base import *
14from milk_server.lib.Date import *
15from milk_server.models.DiscoveryState import DiscoveryState,constraints
16from milk_server.controllers.home import HomeController
17import browserconstants as bc
18
19class DiscoveryController(HomeController):
20    '''
21    Provides the pylons controller for NDG discovery
22    '''
23
24    def __setup(self):
25        '''
26        Common setup for controller methods
27        '''
28        self.cf=request.environ['ndgConfig']
29        self.inputs=dict(parse_querystring(request.environ))
30        self.message=''
31        c.errors = {}    # dict to store error messages
32               
33   
34    def index(self):
35        '''
36        Main entry point for doing discovery searches
37        '''
38        self.__setup()
39       
40        # if inputs are not set: if the discovery mode is enabled, display
41        # the search screen, otherwise redirect to the default home page according
42        # to what milk mode is set (i.e. editor/browse)
43        if not self.inputs or 'ClearForm' in self.inputs:
44            if g.discoveryEnabled:
45                return self.__advancedPrompt()
46            else:
47                logging.info("Discovery mode not enabled - redirect to default")
48                return h.redirect_to(h.url_for('default'))
49       
50        self.__getInputs()
51
52        # if any errors are found, return user to search page
53        if c.errors:
54            return self.__advancedPrompt()
55
56        searchString = self.inputs['searchString']
57        if 'vocabTerm' in self.inputs:
58            searchString += " %s" %self.inputs['vocabTerm']
59           
60        # users can return to search page to refine the search inputs; in this case
61        # they will have a 'constrained' input
62        self.constraints = self.__buildconstraints(self.dateRange, self.bbox, self.scope,
63                                                   searchString, self.inputs['geoSearchType'])
64        if 'constrained' in self.inputs: 
65            return self.__advancedPrompt(searchConstraints = self.constraints)
66
67        # ok, now go do the search
68        try:
69            return self.__runSearch(searchString, self.inputs['textTarget'],
70                                 self.inputs['start'], self.inputs['howmany'], 
71                                 scope = self.scope, dateRange = self.dateRange, 
72                                 bbox = self.bbox, geoSearch=self.inputs['geoSearchType'])
73        except Exception, e:
74            if g.debugModeOn == 'True':
75                raise e
76            else:
77                c.xml='Unexpected error: %s'%(str(e))
78                return render('error')
79
80
81    def __getInputs(self):
82        '''
83        Retrieve the user inputs and set defaults.  Values are stored in the
84        self.inputs dict
85        '''
86        logging.debug("Getting user inputs")
87        # see if this is a discovery search or a more complicated search
88        if 'searchTarget' not in self.inputs: 
89            self.inputs['searchTarget']='Discovery'
90       
91        # set default for table paging, if not already set
92        # NB, url arguments need converting back to ints
93        if 'start' not in self.inputs:
94            self.inputs['start'] = 1
95        else:
96            self.inputs['start'] = int(self.inputs['start'])
97           
98        if 'howmany' not in self.inputs:
99            self.inputs['howmany'] = 10
100        else:
101            self.inputs['howmany'] = int(self.inputs['howmany'])
102           
103        # the simplest query we might get is a text search, in which case
104        # the inputs should be start, howmany and searchString (although
105        # maybe not in that order. The next simplest is one with
106        # a specified textTarget, after that we need all the inputs.
107        if 'searchString' in self.inputs and 'textTarget' not in self.inputs:
108            # it's a simple text search
109            self.inputs['textTarget']='All'
110
111        # the next simplest is one that includes texttarget as well ...
112        expected=['searchString','textTarget']
113        missingInputs = self.__checkform(expected)
114        if missingInputs:
115            if bc.INCOMPLETE_SEARCH_INPUT_MESSAGE not in c.errors:
116                c.errors[bc.INCOMPLETE_SEARCH_INPUT_MESSAGE] = []
117            c.errors[bc.INCOMPLETE_SEARCH_INPUT_MESSAGE].extend(missingInputs)
118
119        self.__getSpatioTemporalInputs()
120        logging.debug("User inputs retrieved")
121
122
123    def __getSpatioTemporalInputs(self):
124        '''
125        Get spatiotemporal input data - and set up any defaults, if required
126        '''
127        logging.debug("Getting spatiotemporal inputs")
128        if 'geoSearchType' not in self.inputs:
129            self.inputs['geoSearchType']='overlaps'
130
131        # now we add the defaults... this is kind of historical - NOT SURE THIS IS STILL NEEDED
132        if len(self.inputs)==6:
133            self.bbox=None
134            self.dateRange=None
135            self.scope=None
136            return
137       
138        if 'source' in self.inputs and self.inputs['source'] != 'All':
139            # NB, the WSDL expects a list
140            self.scope = [self.inputs['source']]
141        else:
142            self.scope = None
143           
144        missingInputs = self.__checkform(['bboxN','bboxE','bboxS','bboxW','geoSearchType'])
145        if missingInputs: 
146            self.bbox = None
147        else:
148            # default form has a global bounding box, NB, internal to this routine we use bbox=[N,W,E,S], not [W,S,E,N]!
149            self.bbox = [self.inputs['bboxN'], self.inputs['bboxW'],
150                         self.inputs['bboxE'], self.inputs['bboxS']]
151           
152            errors = self.__checkBBoxValidity(self.bbox)
153            if errors:
154                if bc.INVALID_BBOX_MESSAGE not in c.errors:
155                    c.errors[bc.INVALID_BBOX_MESSAGE] = []
156                c.errors[bc.INVALID_BBOX_MESSAGE].extend(missingInputs)
157
158        missingInputs = self.__checkform(['startDate', 'endDate'])
159        if missingInputs or not(self.inputs['startDate'] and self.inputs['endDate']):
160            self.dateRange = None
161        else:
162            dateError = None
163            try:
164                year, month, day = self.inputs['startDate'].split('/')
165                self.dateRange = [(day, month, year)]
166                year, month, day = self.inputs['endDate'].split('/')
167                self.dateRange.append((day, month, year))
168
169                if self.dateRange <> [("","",""),("","","")]:
170                    dateError = self.__checkdates(self.dateRange)
171                   
172                else: 
173                    self.dateRange = None
174                               
175            except:
176                dateError = 'Invalid date provided'
177
178            if dateError:
179                if bc.INVALID_DATERANGE_MESSAGE not in c.errors:
180                    c.errors[bc.INVALID_DATERANGE_MESSAGE] = []
181                c.errors[bc.INVALID_DATERANGE_MESSAGE].append(dateError)
182       
183        logging.debug("Spatiotemporal inputs retrieved")
184
185
186    def __getSearchClient(self, clientType):
187        '''
188        Retrieve the appropriate client to complete the search
189        - currently supported are browse and discovery clients
190        @param clientType: type of search client to use.  Currently accepts,
191        'Discovery', 'Browse' and 'NumSim'
192        @raise ValueError if unrecognised search client entered
193        @return search client adhering to the ndg.common.clients.interfacesearchclient
194        interface
195        '''
196        logging.debug("Getting %s type search client" %clientType)
197        searchClient = None
198        if clientType =='Discovery':
199            logging.info(" - use Discovery service to complete search")
200            if g.discoveryServiceURL:
201                searchClient = DiscoveryServiceClient(HostAndPort = g.discoveryServiceURL)
202            else:
203                searchClient = DiscoveryServiceClient()
204               
205        elif clientType in ['Browse','NumSim']:
206            logging.info(" - use Browse service to complete search")
207            searchClient = SearchClient(dbHostName = g.localEXist,
208                                        configFileName = g.pwFile)
209        else:
210            raise ValueError("Unrecognised search type, '%s'" %clientType)
211       
212        logging.debug("- returning search client")
213        return searchClient
214       
215
216    def __runSearch(self, searchString, textTarget, start,
217                 howmany, scope = None, dateRange = None, bbox = None,
218                 geoSearch = 'overlaps'):
219        '''
220        Carry out a text search for <searchString>
221        in the <textTarget> where the accepted text target values are controlled
222        by the DiscoveryTemplate GUI, and are: All, Authors, Parameters
223        @param searchString: string to search for
224        @param textTarget: target of the search - either, 'All', 'Authors' or 'Parameters'
225        @param start: starting record to return
226        @param howmany: number of records to return
227        @keyword scope: scope of search - either NERC, NERC_DDC, MDIP or DPPP. Default = None
228        @keyword dateRange: date range in format [startDate, endDate] where the
229        date objects are tuples with format (day, month, year). Default = None
230        @keyword bbox: Bounding box in format [N,W,E,S]. Default = None
231        @keyword geoSearch: type of spatial search. Defaults to 'overlaps'.
232        '''
233        logging.debug("Running text search with string, '%s'" %searchString)
234       
235        searchClient = self.__getSearchClient(self.inputs['searchTarget'])
236       
237        if self.inputs['searchTarget'] in ['Browse','NumSim']:
238            textTarget = self.inputs['searchTarget']
239            if textTarget == 'Browse':
240                # NB, this switches the searching to be done on atom format
241                # rather than moles1.3 format docs
242                textTarget = SearchClient.ATOM_TARGET#'ndg_B_metadata'
243           
244        # PJK 04/09/08 Handle errors more gracefully
245        #
246        # http://proj.badc.rl.ac.uk/ndg/ticket/984
247        try:
248            searchClient.search(searchString,
249                                start = start,
250                                howmany = howmany,
251                                target = textTarget,
252                                scope = scope,
253                                dateRange = dateRange,
254                                bbox = bbox,
255                                geoSearchType = geoSearch)
256        except socket.error, e:
257            logging.error("Socket error for discovery service search: %s" % e)
258            c.xml='The Discovery Service is unavailable.  Please check with '+\
259                    'your system administrator'
260            return render('error')
261        except Exception, e:
262            logging.error("Calling discovery service search: %s" % e)
263            c.xml='An internal error occured.  Please check with ' + \
264                    'your system administrator'
265            return render('error')
266           
267        logging.debug("Search returned - now processing results")
268        # DiscoveryState object is a wrapper to the various search config
269        # variables
270        c.state = DiscoveryState(searchClient.serverSessionID, searchString,
271                                 request.environ, searchClient.hits, self.constraints,
272                                 start, howmany)
273
274        return self.__processSearchResults(searchClient, c.state)
275
276
277    def __processSearchResults(self, searchClient, ds):
278        '''
279        Process the results from a search - as ran by the input search client object
280        @param searchClient: search client adhering to the ndg.common.clients.interfacesearchclient
281        interface - which has just ran a search
282        @param ds: DiscoveryState object with info on the search
283        '''
284        if searchClient.error:
285            logging.error("Error encountered whilst running search: %s" %searchClient.error)
286            m=''
287            for i in searchClient.error:
288                m+='<p>%s</p>'%i
289            c.xml = m
290            return render('error')
291       
292        hits = searchClient.hits
293        if hits == 0 and ds.constraintsInstance['textTarget'] != SearchClient.ATOM_TARGET:
294            outMessage = 'No records found [constraints: %s]' %ds.constraints
295            logging.info(outMessage) 
296            c.xml='<p>' + outMessage + '</p>'
297            return render('content')
298       
299        # NB, this is used in the semantic search function of results.kid and short_results.kid
300        c.querystring = request.environ['QUERY_STRING']
301     
302        try:
303            # display browse search results differently
304            if self.inputs['searchTarget'] != 'Discovery':
305                return self.__displayBrowseSearchResults(searchClient)
306
307            # now actually retrieve the search records
308            results = searchClient.getLabelledDocs(format='DIF')
309
310            if not results:
311                c.xml='<p> No results for "%s"!</p>'%ds.searchString
312                return render('content')
313
314            difs = []
315            errors = []
316            for result in results: 
317                obj=ndgObject(result[0], config = self.cf)
318                try:
319                    difs.append(DIF(result[1],ndgObj=obj))
320                except ValueError,e:
321                    errors.append((result[0], str(e)))
322
323            if not difs:
324                c.xml='<p>No usable results for "%s"!</p>'%ds.searchString
325                return render('content')
326           
327            elif errors:
328                c.xml='<p>Search results for "%s"'%ds.searchString
329                dp=[]
330                for e in errors:
331                    n=ndgObject(e[0])
332                    if n.repository not in dp: 
333                        dp.append(n.repository)
334                if len(dp)<>1: 
335                    dp='[Various Data Providers]'
336                else:
337                    dp='[%s]'%dp[0] 
338                   
339                c.xml+=' (unfortunately %s hits matched unformattable documents from %s, an internal error has been logged):</p>'%(len(errors),dp)
340                status, message=mailHandler([g.metadataMaintainer],'DIF errors',
341                                            str(errors), server = g.mailServer)
342                if not status:
343                    c.xml+='<p> Actually, not even an internal error has been logged. <br/>'
344                    c.xml+='Internal sending of mail failed with error [%s]</p>'%message
345                return render('content')
346               
347            # if we're here, we're ready to display the dif records
348            c.difs = difs
349            session['results'] = h.current_url()
350            session.save()
351           
352            # set up the displayed tabs
353            if len(c.pageTabs)==1: 
354                c.pageTabs.append(('Results', session['results']))
355                c.pageTabs.append(('Selections',
356                                   h.url_for(controller='browse/selectedItems',
357                                             action='index')))
358            elif c.pageTabs[1][0]!='Results':
359                    c.pageTabs.insert(1,('Results',session['results']))
360                    selectionsNeeded=1
361                    for tab in c.pageTabs[0]:
362                        if tab == 'Selections':
363                            selectionsNeeded=0
364                    if selectionsNeeded:
365                        c.pageTabs.append(('Selections',
366                                   h.url_for(controller='browse/selectedItems',
367                                             action='index')))
368                       
369            return render('browse/results')
370               
371        except ValueError,e:
372            if g.debugModeOn == 'True':
373                raise ValueError,str(e)
374            else:
375                c.xml='<p> Error retrieving documents for %s hits is [%s]</p>'%(hits,e)
376                return render('content')
377
378       
379    def __advancedPrompt(self, searchConstraints = None):
380        '''
381        This provides the advanced search input page
382        @keyword searchConstraints: a DiscoveryState.constraints object with the
383        search filter details in
384        '''
385        #defaults
386        c.title = bc.DISCOVERY_HOME_TITLE
387        c.bbox='90.0','-180.0','180.0','-90.0'
388        c.startDate = ''
389        c.endDate = ''
390        c.textTarget='All'
391        c.searchString=''
392        c.source=['All']
393        c.geoSearchType='overlaps'
394
395        # apply any available constraints
396        if searchConstraints:
397            if searchConstraints['dateRange']:
398                c.startDate = '%s/%s/%s' %searchConstraints['dateRange'][0]
399                c.endDate = '%s/%s/%s' %searchConstraints['dateRange'][1]
400            if searchConstraints['bbox']:
401                c.bbox=searchConstraints['bbox']
402            if searchConstraints['textTarget']:
403                c.textTarget=searchConstraints['textTarget']
404            if searchConstraints['searchString']:
405                c.searchString=searchConstraints['searchString']
406            if searchConstraints['scope']:
407                c.source=searchConstraints['scope']
408            if searchConstraints['geoSearchType']:
409                c.geoSearchType = searchConstraints['geoSearchType']
410       
411        return self.savePageAndRender("browse/discovery_search", **self.inputs)
412
413       
414    def __checkBBoxValidity(self, bbox):
415        '''
416        Check the integrity of the bounding box; return any errors found as list
417        @return: list of errors
418        '''
419        errors = []
420       
421        for name, val in [('North', float(bbox[0])), ('South', float(bbox[3]))]:
422            if val > 90.0 or val < -90.:
423                errors.append("%s latitude exceeds valid range - -90 <= x <= 90" %name)
424               
425        for name, val in [('West', float(bbox[1])), ('East', float(bbox[2]))]:
426            if val > 180.0 or val < -180.:
427                errors.append("%s longitude exceeds valid range - -180 <= x <= 180" %name)
428        return errors
429
430           
431    def __checkform(self,expected):
432        '''
433        Simply checks the inputs to make sure the elements in expected are present
434        - NB, this isn't actually checking that a value for these inputs are set, it
435        is just checking the fields are there
436        @return array of missing inputs
437        '''
438        logging.debug("Checking for missing inputs")
439        missingInputs = []
440        for i in expected:
441            if i not in self.inputs:
442                logging.debug(" - found missing input: %s" %i)
443                missingInputs.append(i)
444        logging.debug("Finished checking for missing inputs")
445        return missingInputs
446       
447               
448    def __checkdates(self,dateRange):
449        '''
450        Check input dates for sanity
451        @return: error message, if invalid, None otherwise
452        '''
453        if not ValidDate(dateRange[0])*ValidDate(dateRange[1]):
454            return str(dateRange)
455        elif JulDay(dateRange[0]) >= JulDay(dateRange[1]):
456            return 'second date must be after first date'
457     
458        return None
459
460       
461    def __buildconstraints(self, dateRange, bbox, scope, searchString, geoSearch):
462        '''
463        Build and return a DiscoveryState.constraints object
464        '''
465        return constraints(dateRange=dateRange, bbox=bbox,
466                           scope=scope, searchString=searchString, 
467                           geoSearchType=geoSearch)
468       
469
470    def semantic(self):
471        self.__setup()
472        vs = VS(proxyServer = g.proxyServer)
473        if 'searchString' in self.inputs:
474            try:
475                [broader,narrower,synonyms] = vs.getRelated(self.inputs['searchString'])
476                #get a base string for the links to new searches
477                if 'start' in self.inputs: del self.inputs['start']
478                if 'howmany' in self.inputs: del self.inputs['howmany']
479                self.inputs['searchString']='###SEARCHSSTRING###'
480                q='%s/discovery?'%g.server
481                for i in self.inputs: q+='%s=%s&'%(i,self.inputs[i])
482                url=q[0:-1]
483                # and now build the links
484                c.narrower=[]
485                c.broader=[]
486                c.synonyms=[]
487                for i in narrower:
488                    c.narrower.append((i,url.replace('###SEARCHSSTRING###',i)))
489                for i in broader:
490                    c.broader.append((i,url.replace('###SEARCHSSTRING###',i)))
491                for i in synonyms:
492                    c.synonyms.append((i,url.replace('###SEARCHSSTRING###',i)))
493                if c.narrower!=[] or c.broader!=[] or c.synonyms!=[]: c.semAvailable=1
494            except IOError,e:
495                c.semAvailable=0
496                c.semError=' (No valid reply from vocabulary service)'
497                #This should go in a log file ...
498                print 'ERROR: Vocabulary Service: %s (for search [%s])'%(str(e),self.inputs['searchString'])
499        else:
500            broader,narrower,synonyms=[],[],[]
501            c.semAvailable=0
502            c.semError='.'
503       
504        return render('browse/semantic',fragment=True)
505
506   
507    def __displayBrowseSearchResults(self, searchClient):
508        '''
509        Provides the search results for Browse and NumSim content
510        @param searchClient: search client adhering to the ndg.common.clients.interfacesearchclient
511        interface - which has just ran a search
512        '''
513        c.results = searchClient.results
514        c.searchTarget = self.inputs['searchTarget']
515        textTarget = self.inputs['textTarget']
516
517        # check if we've done a search against atoms - NB, this should be the
518        # default eventually - so we can remove all the alternative options
519        isAtom = False
520        if textTarget == SearchClient.ATOM_TARGET:
521            isAtom = True
522       
523        for r in c.results:
524            id = r.id
525                # cope with atom docs
526            if isAtom:
527                r.link = r.href
528            else:
529                n=ndgObject(id,config=self.cf)
530                r.link={'Browse':n.BURL,'NumSim':n.URL}[c.searchTarget]
531
532        # filter atom docs according to publication state
533        if isAtom:
534            c.searchTerm = " - for search term, '%s'" %self.inputs['searchString']
535            if not g.atomEditorEnabled:
536                c.results = self.__filterAtomResults(c.results)
537
538            if c.results:
539                c.searchTerm += ' [%s results found]' %len(c.results)
540               
541               
542            html = render('genshi', 'browse/short_atom_results')
543            # make sure the edit links point to the editor, not the browse service
544            html = html.replace(VTD.BROWSE_SERVER_URL + '/editAtom', g.server + '/editAtom')
545            return html
546        else:
547            return render('browse/short_results')
548
549
550    def __filterAtomResults(self, results):
551        '''
552        Given a set of atom docs search results, filter these to only return docs in the
553        'published' or 'Published' state
554        @param results: list of results as returned by SearchClient
555        @return filteredResults: list of results with only published data included
556        '''
557        logging.debug("Filtering results to remove non-published data")
558        filteredResults = []
559        for result in results:
560            if result.collection.find('ublished') == -1:
561                logging.debug("- found non-published doc - ignoring")
562                continue
563            filteredResults.append(result)
564        logging.debug("- returning filtered results")
565        return filteredResults
566
567           
568    def clearSession(self):
569        '''
570        Clear out all session variables - to help when these change in development
571        '''
572        session.clear()
573        session.save()           
574     
Note: See TracBrowser for help on using the repository browser.