source: TI07-MOLES/trunk/PythonCode/wsgi/ndgSearch.py @ 2089

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI07-MOLES/trunk/PythonCode/wsgi/ndgSearch.py@2089
Revision 2089, 8.4 KB checked in by lawrence, 13 years ago (diff)

This version of ndgSearch.py includes a test case for ticket:641 (which
is still not fixed).

Line 
1# these imports are autogenerated by wsdl2py ...
2from DiscoveryService_services_types import *
3from DiscoveryService_services import *
4
5#normal imports
6import sys, time
7try: #python 2.5
8    from xml.etree import ElementTree as ET
9except ImportError:
10    try:
11        # if you've installed it yourself it comes this way
12        import ElementTree as ET
13    except ImportError:
14        # if you've egged it this is the way it comes
15        from elementtree import ElementTree as ET
16   
17from ETxmlView import loadET,et2text
18
19from xml.dom import expatbuilder
20
21class ExpatReaderClass:
22      fromString = staticmethod(expatbuilder.parseString)
23      fromStream = staticmethod(expatbuilder.parse)
24 
25
26class ndgSearch:
27    ''' Provides a client to the NDG discovery services methods exposed by the Discovery.wsdl '''
28   
29    def __init__(self,logger=None,tracefile=None):
30        '''get an instance of the service'''
31        #how do we get a different backend provider?
32        loc=DiscoveryServiceLocator()
33        #self.server=loc.getDiscovery(readerclass=ExpatReaderClass,
34        self.server=loc.getDiscoveryServicePortType(readerclass=ExpatReaderClass,tracefile=tracefile)
35        self.serverSessionID=None
36        self.logger=logger
37        self.__reset()
38       
39    def __reset(self):
40        ''' Initialise and/or making sure we have no hangovers from a previous call '''
41        self.documents=None
42        self.hits=None
43        self.error=None
44        self.response=None
45        self.status=None
46   
47    def search(self,term,start=1,howmany=20,target='All',scope=None,bbox=None,dateRange=None,):
48        ''' Get a list of documents for a specific term using a free text search'''
49       
50        self.__reset()
51        #if target=='Authors':
52        #    request=doAuthorSearchRequest()
53        #elif target=='Params':
54        #    request=doParameterSearchRequest()
55        #else:
56        #    request=doFullTextSearchRequest()
57        request=doSearchRequest()
58        request.Term=term
59        request.Start=start
60        request.HowMany=howmany
61        request.TermType={'Authors':'author','Params':'parameter','All':'fullText'}[target]
62        if bbox is not None:
63            box=request.new_BoundingBox()
64            box.LimitNorth,box.LimitSouth,box.LimitWest,box.LimitEast=map(float,bbox)
65            request.BoundingBox=box
66       
67        if scope is not None:
68            request.Scope=scope
69       
70        if dateRange is not None:
71            start=int(dateRange[0][2]),int(dateRange[0][1]),int(dateRange[0][0]),0,0,0,0
72            end=int(dateRange[1][2]),int(dateRange[1][1]),int(dateRange[1][0]),0,0,0,0
73           
74            dRange=request.new_DateRange()
75            dRange.DateRangeStart=start
76            dRange.DateRangeEnd=end
77            request.DateRange=dRange
78
79        if self.logger: itime=time.time()
80        response=self.server.doSearch(request)
81           
82        if self.logger:
83            etime=time.time()-itime
84            self.logger.info('Search Request [%s] took [%ss]'%(term,etime))
85        if response._status:
86            self.serverSessionID=response._resultId
87            self.hits=response.Hits
88            self.documents=response._documents
89        else:
90            self.hits=response.Hits
91        self.status=[response._statusMessage,]
92        self.response=response
93        return self.documents
94
95    def getDoc(self,document):
96        ''' Return a single document from the backend database '''
97       
98        #create a request object
99        request=doPresentRequest()
100        request._documents=[document,]
101        request._format='original'
102        try:
103            searchResult=self.server.doPresent(request)
104        except Exception,e:
105            raise Exception,e
106        if searchResult._status:
107            result=searchResult._documents[0]
108        else:
109            result=searchResult._statusMessage
110        return result
111                   
112    def getAllDocs(self,format='original'):           
113        ''' Parse the list of documents, and retrieve them directly '''
114        docs=[]
115        self.status=[]
116       
117        if self.documents is None: return []
118       
119        #create a request object
120        request=doPresentRequest()
121        if self.logger: itime=time.time()
122       
123        request.Documents=self.documents
124        request.Format=format
125        result=self.server.doPresent(request)   
126        if result._status:
127            docs=result._documents._document
128       
129        if self.logger:
130            etime=time.time()-itime
131            self.logger.info('Document Load [n=%s] took [%ss]'%(len(self.documents._document),etime))
132        #self.docs=docs
133        #return self.docs
134        #removed self.docs reference Jan 31st, can't see why we had it ...
135        #nb returning a complete copy of the list didn't fix the problems with repeated calls
136        return docs
137           
138           
139    def __xmlerror(self,docmessage):
140        print docmessage
141        return ET.fromstring('<Error><Document>%s</Document><Message>%s</Message></Error>'%docmessage)
142           
143    def getDocElement(self,document):
144        ''' Takes a document path (maybe from a previous call to ndgSearch) and extracts that document
145        into an ElementTree instance '''
146        #we stick it straight into element tree because we need to use et to get the actual document
147        #we want, not the envelope xml elements
148       
149        doc=self.getDoc(document)
150        path=document.rstrip('.xml')
151        try:
152            r=loadET(doc)
153            #return r.find(path)
154            return r
155        except:
156            return self.__xmlerror((path,doc))
157           
158    def getAllDocsAsElements(self):
159        ''' Get all the documents and load them into a list of ET instances '''
160        result=[]
161        docs=self.getAllDocs()
162        for doc in docs: 
163            try:
164                r=loadET(doc)
165                result.append(r)#result.append(r.find(path))
166            except:
167                result.append(self.__xmlerror(doc))
168        return result
169           
170           
171    def getLabelledDocs(self,format='original'):
172        ''' Returns all the documents in sequence in a labelled list of strings'''
173        if self.hits==0: return []
174        #filenames=self.documents.Document
175        #so we know that the following call is the problem ...
176        responses=self.getAllDocs(format)
177        filenames=self.documents.Document
178        i=len(filenames)
179        j=len(responses)
180        if i!=j:
181            raise ValueError,'Internal inconsistency in search return [%s!=%s]'%(i,j)
182        indices=range(i)
183        results=[]
184        for i in indices:
185            results.append((filenames[i].strip('.xml'),responses[i]))
186        return results
187       
188       
189import unittest
190
191class TestCase(unittest.TestCase):
192   
193    def testSearch(self):
194        ''' Test fundamental search capability '''
195        term='temperature'
196        s=ndgSearch()
197        howmany=10
198        docs=s.search(term,start=1,howmany=howmany)
199        print 'Asked for ',howmany,' documents (there were %s hits):'%s.hits
200        print s.documents.Document
201       
202    def testgetLabelledDocs(self):
203        ''' Test returning of labelled documents '''
204        term='neodc'
205        s=ndgSearch()
206        r=s.search(term)
207        print s.status
208        output=s.getLabelledDocs()
209       
210    def testNoReturn(self):
211        ''' Tests a search return with (hopefully nothing to be found)'''
212        term='xpabnl'
213        s=ndgSearch()
214        r=s.search(term)
215        print 'Hopefully this is zero: if not, expect the NoReturn test to fail:',s.hits
216        output=s.getLabelledDocs()
217        self.assertEqual(len(output),0)
218       
219    def testSequence(self):
220        ''' Tests that repeated searches work and can support zero responses in the middle '''
221        # this was a bug we found and needed a test case for
222        s=ndgSearch(tracefile=sys.stdout)
223        term='neodc'
224        r=s.search(term)
225        print s.hits
226        #if we uncomment this it breaks, if not it doesn't ...
227        #output=s.getLabelledDocs()
228        #this fails as well, and we've isolated the dependency in the previous call to
229        #be on this one:
230        res=s.getAllDocs()
231        #del(output) doesn't work ...
232        term='xpabnl'
233        r=s.search(term)
234        print s.hits
235        print 'Hopefully this is also zero: if not, expect the Sequence test to fail:',s.hits
236        #output=s.getLabelledDocs()
237        #self.assertEqual(len(output),0)
238       
239
240if __name__=="__main__":
241    unittest.main()
Note: See TracBrowser for help on using the repository browser.