source: TI07-MOLES/trunk/PythonCode/wsgi/ndgSearch.py @ 1989

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI07-MOLES/trunk/PythonCode/wsgi/ndgSearch.py@1989
Revision 1989, 7.5 KB checked in by lawrence, 13 years ago (diff)

Modifications to support new exist database layout for discovery
(don't yet work on glue!)

Line 
1# these imports are autogenerated by wsdl2py ...
2from DiscoveryService_services_types import *
3from DiscoveryService_services import *
4
5#normal imports
6import sys, time
7try: #python 2.5
8    import xml.etree.ElementTree as ET
9except:
10    import ElementTree as ET
11   
12from ETxmlView import loadET,et2text
13
14from xml.dom import expatbuilder
15
16class ExpatReaderClass:
17      fromString = staticmethod(expatbuilder.parseString)
18      fromStream = staticmethod(expatbuilder.parse)
19 
20
21class ndgSearch:
22    ''' Provides a client to the NDG discovery services methods exposed by the Discovery.wsdl '''
23   
24    def __init__(self,logger=None,tracefile=None):
25        '''get an instance of the service'''
26        #how do we get a different backend provider?
27        loc=DiscoveryServiceLocator()
28        #self.server=loc.getDiscovery(readerclass=ExpatReaderClass,
29        self.server=loc.getDiscoveryServicePortType(readerclass=ExpatReaderClass,tracefile=tracefile)
30        self.documents=None
31        self.hits=None
32        self.serverSessionID=None
33        self.logger=logger
34        self.error=None
35
36   
37    def search(self,term,start=1,howmany=20,target='All',scope=None,bbox=None,dateRange=None,):
38        ''' Get a list of documents for a specific term using a free text search'''
39       
40       
41        #if target=='Authors':
42        #    request=doAuthorSearchRequest()
43        #elif target=='Params':
44        #    request=doParameterSearchRequest()
45        #else:
46        #    request=doFullTextSearchRequest()
47        request=doSearchRequest()
48        request.Term=term
49        request.Start=start
50        request.HowMany=howmany
51        request.TermType={'Authors':'author','Params':'parameter','All':'fullText'}[target]
52        if bbox is not None:
53            box=request.new_BoundingBox()
54            box.LimitNorth,box.LimitSouth,box.LimitWest,box.LimitEast=map(float,bbox)
55            request.BoundingBox=box
56       
57        if scope is not None:
58            request.Scope=scope
59       
60        if dateRange is not None:
61            start=int(dateRange[0][2]),int(dateRange[0][1]),int(dateRange[0][0]),0,0,0,0
62            end=int(dateRange[1][2]),int(dateRange[1][1]),int(dateRange[1][0]),0,0,0,0
63           
64            dRange=request.new_DateRange()
65            dRange.DateRangeStart=start
66            dRange.DateRangeEnd=end
67            request.DateRange=dRange
68
69        if self.logger: itime=time.time()
70        response=self.server.doSearch(request)
71        #try:
72        #if target=='All':
73        #    response=self.server.doFullTextSearch(request)
74        #elif target=='Authors':
75        #    response=self.server.doAuthorSearch(request)
76        #elif target=='Params':
77        #    response=self.server.doParameterSearch(request)
78        #else:
79        #    self.error=('Unknown search target type (%s)'%target,'programming error in backend')
80        #    return
81        #except Exception,e:
82        #   raise Exception,e
83           #self.error='Error: Unable to Carry Out Search','Internal error was (%s)'%e
84        #   return
85           
86        if self.logger:
87            etime=time.time()-itime
88            self.logger.info('Search Request [%s] took [%ss]'%(term,etime))
89        if response._status:
90            self.serverSessionID=response._resultId
91            self.hits=response.Hits
92            self.documents=response._documents
93        else:
94            self.hits=response.Hits
95        self.status=[response._statusMessage,]
96        self.response=response
97        return self.documents
98
99    def getDoc(self,document):
100        ''' Return a single document from the backend database '''
101       
102        #create a request object
103        request=doPresentRequest()
104        request._documents=[document,]
105        request._format='original'
106        try:
107            searchResult=self.server.doPresent(request)
108        except Exception,e:
109            raise Exception,e
110        if searchResult._status:
111            result=searchResult._documents[0]
112        else:
113            result=searchResult._statusMessage
114        return result
115                   
116    def getAllDocs(self,format='original'):           
117        ''' Parse the list of documents, and retrieve them directly '''
118        docs=[]
119        self.status=[]
120       
121        if self.documents is None: return []
122       
123        #create a request object
124        request=doPresentRequest()
125        if self.logger: itime=time.time()
126       
127        request.Documents=self.documents
128        request.Format=format
129        result=self.server.doPresent(request)   
130        if result._status:
131            docs=result._documents._document
132       
133        if self.logger:
134            etime=time.time()-itime
135            self.logger.info('Document Load [n=%s] took [%ss]'%(len(self.documents._document),etime))
136        self.docs=docs
137        return self.docs
138           
139           
140    def __xmlerror(self,docmessage):
141        print docmessage
142        return ET.fromstring('<Error><Document>%s</Document><Message>%s</Message></Error>'%docmessage)
143           
144    def getDocElement(self,document):
145        ''' Takes a document path (maybe from a previous call to ndgSearch) and extracts that document
146        into an ElementTree instance '''
147        #we stick it straight into element tree because we need to use et to get the actual document
148        #we want, not the envelope xml elements
149       
150        doc=self.getDoc(document)
151        path=document.rstrip('.xml')
152        try:
153            r=loadET(doc)
154            #return r.find(path)
155            return r
156        except:
157            return self.__xmlerror((path,doc))
158           
159    def getAllDocsAsElements(self):
160        ''' Get all the documents and load them into a list of ET instances '''
161        result=[]
162        docs=self.getAllDocs()
163        for doc in docs: 
164            try:
165                r=loadET(doc)
166                result.append(r)#result.append(r.find(path))
167            except:
168                result.append(self.__xmlerror(doc))
169        return result
170           
171           
172    def getLabelledDocs(self,format='original'):
173        ''' Returns all the documents in sequence in a labelled list of strings'''
174        if self.hits==0: return []
175        filenames=self.documents.Document
176        responses=self.getAllDocs(format)
177        filenames=self.documents.Document
178        i=len(filenames)
179        j=len(responses)
180        if i!=j:
181            raise ValueError,'Internal inconsistency in search return [%s!=%s]'%(i,j)
182        indices=range(i)
183        results=[]
184        for i in indices:
185            results.append((filenames[i].strip('.xml'),responses[i]))
186        return results
187       
188       
189import unittest
190
191class TestCase(unittest.TestCase):
192   
193    def testSearch(self):
194        ''' Test fundamental search capability '''
195        term='temperature'
196        s=ndgSearch()
197        howmany=10
198        docs=s.search(term,start=1,howmany=howmany)
199        print 'Asked for ',howmany,' documents (there were %s hits):'%s.hits
200        print s.documents.Document
201       
202    def testgetLabelledDocs(self):
203        ''' Test returning of labelled documents '''
204        term='neodc'
205        s=ndgSearch()
206        r=s.search(term)
207        print s.status
208        output=s.getLabelledDocs()
209       
210    def testNoReturn(self):
211        ''' Tests a search return with (hopefully nothing to be found)'''
212        term='xpabnl'
213        s=ndgSearch()
214        r=s.search(term)
215        print 'Hopefully this is zero: if not, expect the NoReturn test to fail:',s.hits
216        output=s.getLabelledDocs()
217        self.assertEqual(len(output),0)
218
219if __name__=="__main__":
220    unittest.main()
Note: See TracBrowser for help on using the repository browser.