source: TI07-MOLES/trunk/PythonCode/wsgi/ndgSearch.py @ 2116

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI07-MOLES/trunk/PythonCode/wsgi/ndgSearch.py@2116
Revision 2116, 8.4 KB checked in by lawrence, 13 years ago (diff)

Uggh. Previous fix ignored a problem in logging. fixed

Line 
1# these imports are autogenerated by wsdl2py ...
2from DiscoveryService_services_types import *
3from DiscoveryService_services import *
4
5#normal imports
6import sys, time
7try: #python 2.5
8    from xml.etree import ElementTree as ET
9except ImportError:
10    try:
11        # if you've installed it yourself it comes this way
12        import ElementTree as ET
13    except ImportError:
14        # if you've egged it this is the way it comes
15        from elementtree import ElementTree as ET
16   
17from ETxmlView import loadET,et2text
18
19from xml.dom import expatbuilder
20
21class ExpatReaderClass:
22      fromString = staticmethod(expatbuilder.parseString)
23      fromStream = staticmethod(expatbuilder.parse)
24 
25
26class ndgSearch:
27    ''' Provides a client to the NDG discovery services methods exposed by the Discovery.wsdl '''
28   
29    def __init__(self,logger=None,tracefile=None):
30        '''get an instance of the service'''
31        #how do we get a different backend provider?
32        loc=DiscoveryServiceLocator()
33        #self.server=loc.getDiscovery(readerclass=ExpatReaderClass,
34        self.server=loc.getDiscoveryServicePortType(readerclass=ExpatReaderClass,tracefile=tracefile)
35        self.serverSessionID=None
36        self.logger=logger
37        self.__reset()
38       
39    def __reset(self):
40        ''' Initialise and/or making sure we have no hangovers from a previous call '''
41        self.documents=None
42        self.hits=None
43        self.error=None
44        self.response=None
45        self.status=None
46   
47    def search(self,term,start=1,howmany=20,target='All',scope=None,bbox=None,dateRange=None,):
48        ''' Get a list of documents for a specific term using a free text search'''
49       
50        self.__reset()
51        #if target=='Authors':
52        #    request=doAuthorSearchRequest()
53        #elif target=='Params':
54        #    request=doParameterSearchRequest()
55        #else:
56        #    request=doFullTextSearchRequest()
57        request=doSearchRequest()
58        request.Term=term
59        request.Start=start
60        request.HowMany=howmany
61        request.TermType={'Authors':'author','Params':'parameter','All':'fullText'}[target]
62        if bbox is not None:
63            box=request.new_BoundingBox()
64            box.LimitNorth,box.LimitSouth,box.LimitWest,box.LimitEast=map(float,bbox)
65            request.BoundingBox=box
66       
67        if scope is not None:
68            request.Scope=scope
69       
70        if dateRange is not None:
71            start=int(dateRange[0][2]),int(dateRange[0][1]),int(dateRange[0][0]),0,0,0,0
72            end=int(dateRange[1][2]),int(dateRange[1][1]),int(dateRange[1][0]),0,0,0,0
73           
74            dRange=request.new_DateRange()
75            dRange.DateRangeStart=start
76            dRange.DateRangeEnd=end
77            request.DateRange=dRange
78
79        if self.logger: itime=time.time()
80        response=self.server.doSearch(request)
81           
82        if self.logger:
83            etime=time.time()-itime
84            self.logger.info('Search Request [%s] took [%ss]'%(term,etime))
85        if response._status:
86            self.serverSessionID=response._resultId
87            self.hits=response.Hits
88            self.documents=response.Documents.Document
89        else:
90            self.hits=response.Hits
91        self.status=[response._statusMessage,]
92        self.response=response
93        return self.documents
94
95    def getDoc(self,document):
96        ''' Return a single document from the backend database '''
97       
98        #create a request object
99        request=doPresentRequest()
100        #get an instance of the Documents holder class
101        DocList=request.new_documents()
102        request.Documents=DocList
103        DocList.Document=[document,]
104        request.Format='original'
105        #try:
106        searchResult=self.server.doPresent(request)
107        #except Exception,e:
108        #   raise Exception,e
109        if searchResult._status:
110            result=searchResult.Documents.Document[0]
111        else:
112            result=searchResult._statusMessage
113        return result
114                   
115    def getAllDocs(self,format='original'):           
116        ''' Parse the list of documents, and retrieve them directly '''
117       
118        if self.documents is None: return []
119        if self.logger: itime=time.time()
120         
121        #create a request object
122        request=doPresentRequest()
123        #get an instance of the Documents holder class
124        DocList=request.new_documents()
125        request.Documents=DocList
126        DocList.Document=self.documents
127        request.Format=format
128       
129        result=self.server.doPresent(request)
130        if result._status:
131            docs=result.Documents.Document
132        else:
133            docs=[]
134       
135        if self.logger:
136            etime=time.time()-itime
137            self.logger.info('Document Load [n=%s] took [%ss]'%(len(self.documents),etime))
138
139        return docs
140           
141           
142    def __xmlerror(self,docmessage):
143        print docmessage
144        return ET.fromstring('<Error><Document>%s</Document><Message>%s</Message></Error>'%docmessage)
145           
146    def getDocElement(self,document):
147        ''' Takes a document path (maybe from a previous call to ndgSearch) and extracts that document
148        into an ElementTree instance '''
149        #we stick it straight into element tree because we need to use et to get the actual document
150        #we want, not the envelope xml elements
151       
152        doc=self.getDoc(document)
153        path=document.rstrip('.xml')
154        try:
155            r=loadET(doc)
156            #return r.find(path)
157            return r
158        except:
159            return self.__xmlerror((path,doc))
160           
161    def getAllDocsAsElements(self):
162        ''' Get all the documents and load them into a list of ET instances '''
163        result=[]
164        docs=self.getAllDocs()
165        for doc in docs: 
166            try:
167                r=loadET(doc)
168                result.append(r)#result.append(r.find(path))
169            except:
170                result.append(self.__xmlerror(doc))
171        return result
172           
173           
174    def getLabelledDocs(self,format='original'):
175        ''' Returns all the documents in sequence in a labelled list of strings'''
176        if self.hits==0: return []
177        #filenames=self.documents.Document
178        #so we know that the following call is the problem ...
179        responses=self.getAllDocs(format)
180        filenames=self.documents
181        i=len(filenames)
182        j=len(responses)
183        if i!=j:
184            raise ValueError,'Internal inconsistency in search return [%s!=%s]'%(i,j)
185        indices=range(i)
186        results=[]
187        for i in indices:
188            results.append((filenames[i].strip('.xml'),responses[i]))
189        return results
190       
191       
192import unittest
193
194class TestCase(unittest.TestCase):
195   
196    def testSearch(self):
197        ''' Test fundamental search capability '''
198        term='temperature'
199        s=ndgSearch()
200        howmany=10
201        docs=s.search(term,start=1,howmany=howmany)
202        print 'Asked for ',howmany,' documents (there were %s hits):'%s.hits
203        print s.documents
204       
205    def testgetLabelledDocs(self):
206        ''' Test returning of labelled documents '''
207        term='CD97'
208        s=ndgSearch()
209        r=s.search(term)
210        print s.status
211        print s.documents
212        output=s.getLabelledDocs()
213       
214    def testNoReturn(self):
215        ''' Tests a search return with (hopefully nothing to be found)'''
216        term='xpabnl'
217        s=ndgSearch()
218        r=s.search(term)
219        print 'Hopefully this is zero: if not, expect the NoReturn test to fail:',s.hits
220        output=s.getLabelledDocs()
221        self.assertEqual(len(output),0)
222       
223    def testGetDoc(self):
224        ''' Test obtaining a specific document which had better exist '''
225        doc='noc.soton.ac.uk__DIF__NOCSDAT100.xml'
226        s=ndgSearch()
227        r=s.getDoc(doc)
228       
229    def testSequence(self):
230        ''' Tests that repeated searches work and can support zero responses in the middle '''
231        # this was a bug we found and needed a test case for
232        s=ndgSearch()
233        term='CD97' # hopefully just get a couple of hits for now.
234        r=s.search(term)
235        print s.hits
236        #if we uncomment this it used to break, and not if uncommented, now it seems ok...
237        output=s.getLabelledDocs() 
238        #this failed as well, and we isolated the dependency in the previous call to this one.
239        #res=s.getAllDocs()
240        term='xpabnl'
241        r=s.search(term)
242        print s.hits
243        print 'Hopefully this is also zero: if not, expect the Sequence test to fail:',s.hits
244
245       
246
247if __name__=="__main__":
248    unittest.main()
Note: See TracBrowser for help on using the repository browser.