source: TI07-MOLES/trunk/PythonCode/wsgi/ndgSearch.py @ 2045

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI07-MOLES/trunk/PythonCode/wsgi/ndgSearch.py@2045
Revision 2045, 7.7 KB checked in by lawrence, 13 years ago (diff)

Sundry modes to wsgi browse/discovery on the road to deploying on
glue (mostly about the many possible locations of elementtree)

RevLine 
[1905]1# these imports are autogenerated by wsdl2py ...
2from DiscoveryService_services_types import *
3from DiscoveryService_services import *
4
5#normal imports
6import sys, time
7try: #python 2.5
[2045]8    from xml.etree import ElementTree as ET
9except ImportError:
10    try:
11        # if you've installed it yourself it comes this way
12        import ElementTree as ET
13    except ImportError:
14        # if you've egged it this is the way it comes
15        from elementtree import ElementTree as ET
[1905]16   
17from ETxmlView import loadET,et2text
18
19from xml.dom import expatbuilder
20
21class ExpatReaderClass:
22      fromString = staticmethod(expatbuilder.parseString)
23      fromStream = staticmethod(expatbuilder.parse)
24 
25
26class ndgSearch:
27    ''' Provides a client to the NDG discovery services methods exposed by the Discovery.wsdl '''
28   
[1958]29    def __init__(self,logger=None,tracefile=None):
[1905]30        '''get an instance of the service'''
31        #how do we get a different backend provider?
32        loc=DiscoveryServiceLocator()
33        #self.server=loc.getDiscovery(readerclass=ExpatReaderClass,
[1958]34        self.server=loc.getDiscoveryServicePortType(readerclass=ExpatReaderClass,tracefile=tracefile)
[1905]35        self.documents=None
36        self.hits=None
37        self.serverSessionID=None
38        self.logger=logger
39        self.error=None
40
41   
42    def search(self,term,start=1,howmany=20,target='All',scope=None,bbox=None,dateRange=None,):
43        ''' Get a list of documents for a specific term using a free text search'''
44       
45       
46        #if target=='Authors':
47        #    request=doAuthorSearchRequest()
48        #elif target=='Params':
49        #    request=doParameterSearchRequest()
50        #else:
51        #    request=doFullTextSearchRequest()
52        request=doSearchRequest()
53        request.Term=term
54        request.Start=start
55        request.HowMany=howmany
56        request.TermType={'Authors':'author','Params':'parameter','All':'fullText'}[target]
57        if bbox is not None:
58            box=request.new_BoundingBox()
59            box.LimitNorth,box.LimitSouth,box.LimitWest,box.LimitEast=map(float,bbox)
60            request.BoundingBox=box
61       
62        if scope is not None:
63            request.Scope=scope
64       
65        if dateRange is not None:
66            start=int(dateRange[0][2]),int(dateRange[0][1]),int(dateRange[0][0]),0,0,0,0
67            end=int(dateRange[1][2]),int(dateRange[1][1]),int(dateRange[1][0]),0,0,0,0
68           
69            dRange=request.new_DateRange()
70            dRange.DateRangeStart=start
71            dRange.DateRangeEnd=end
72            request.DateRange=dRange
73
74        if self.logger: itime=time.time()
75        response=self.server.doSearch(request)
76        #try:
77        #if target=='All':
78        #    response=self.server.doFullTextSearch(request)
79        #elif target=='Authors':
80        #    response=self.server.doAuthorSearch(request)
81        #elif target=='Params':
82        #    response=self.server.doParameterSearch(request)
83        #else:
84        #    self.error=('Unknown search target type (%s)'%target,'programming error in backend')
85        #    return
86        #except Exception,e:
87        #   raise Exception,e
88           #self.error='Error: Unable to Carry Out Search','Internal error was (%s)'%e
89        #   return
90           
91        if self.logger:
92            etime=time.time()-itime
93            self.logger.info('Search Request [%s] took [%ss]'%(term,etime))
94        if response._status:
95            self.serverSessionID=response._resultId
[1958]96            self.hits=response.Hits
[1905]97            self.documents=response._documents
[1958]98        else:
99            self.hits=response.Hits
[1905]100        self.status=[response._statusMessage,]
[1958]101        self.response=response
[1905]102        return self.documents
103
104    def getDoc(self,document):
105        ''' Return a single document from the backend database '''
106       
107        #create a request object
108        request=doPresentRequest()
109        request._documents=[document,]
110        request._format='original'
111        try:
112            searchResult=self.server.doPresent(request)
113        except Exception,e:
114            raise Exception,e
115        if searchResult._status:
116            result=searchResult._documents[0]
117        else:
118            result=searchResult._statusMessage
119        return result
120                   
121    def getAllDocs(self,format='original'):           
122        ''' Parse the list of documents, and retrieve them directly '''
123        docs=[]
124        self.status=[]
125       
126        if self.documents is None: return []
127       
128        #create a request object
129        request=doPresentRequest()
130        if self.logger: itime=time.time()
131       
132        request.Documents=self.documents
133        request.Format=format
134        result=self.server.doPresent(request)   
135        if result._status:
136            docs=result._documents._document
137       
138        if self.logger:
139            etime=time.time()-itime
140            self.logger.info('Document Load [n=%s] took [%ss]'%(len(self.documents._document),etime))
141        self.docs=docs
142        return self.docs
143           
144           
145    def __xmlerror(self,docmessage):
146        print docmessage
147        return ET.fromstring('<Error><Document>%s</Document><Message>%s</Message></Error>'%docmessage)
148           
149    def getDocElement(self,document):
150        ''' Takes a document path (maybe from a previous call to ndgSearch) and extracts that document
151        into an ElementTree instance '''
152        #we stick it straight into element tree because we need to use et to get the actual document
153        #we want, not the envelope xml elements
154       
155        doc=self.getDoc(document)
156        path=document.rstrip('.xml')
157        try:
158            r=loadET(doc)
159            #return r.find(path)
160            return r
161        except:
162            return self.__xmlerror((path,doc))
163           
164    def getAllDocsAsElements(self):
165        ''' Get all the documents and load them into a list of ET instances '''
166        result=[]
167        docs=self.getAllDocs()
168        for doc in docs: 
169            try:
170                r=loadET(doc)
171                result.append(r)#result.append(r.find(path))
172            except:
173                result.append(self.__xmlerror(doc))
174        return result
175           
[1958]176           
177    def getLabelledDocs(self,format='original'):
178        ''' Returns all the documents in sequence in a labelled list of strings'''
179        if self.hits==0: return []
180        filenames=self.documents.Document
181        responses=self.getAllDocs(format)
182        filenames=self.documents.Document
183        i=len(filenames)
184        j=len(responses)
185        if i!=j:
186            raise ValueError,'Internal inconsistency in search return [%s!=%s]'%(i,j)
187        indices=range(i)
188        results=[]
189        for i in indices:
[1989]190            results.append((filenames[i].strip('.xml'),responses[i]))
[1958]191        return results
192       
193       
194import unittest
195
196class TestCase(unittest.TestCase):
[1905]197   
[1958]198    def testSearch(self):
199        ''' Test fundamental search capability '''
[1905]200        term='temperature'
[1958]201        s=ndgSearch()
202        howmany=10
203        docs=s.search(term,start=1,howmany=howmany)
204        print 'Asked for ',howmany,' documents (there were %s hits):'%s.hits
205        print s.documents.Document
206       
207    def testgetLabelledDocs(self):
208        ''' Test returning of labelled documents '''
209        term='neodc'
210        s=ndgSearch()
211        r=s.search(term)
212        print s.status
213        output=s.getLabelledDocs()
[1905]214       
[1958]215    def testNoReturn(self):
216        ''' Tests a search return with (hopefully nothing to be found)'''
217        term='xpabnl'
218        s=ndgSearch()
219        r=s.search(term)
220        print 'Hopefully this is zero: if not, expect the NoReturn test to fail:',s.hits
221        output=s.getLabelledDocs()
222        self.assertEqual(len(output),0)
223
224if __name__=="__main__":
225    unittest.main()
Note: See TracBrowser for help on using the repository browser.