source: TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/ndgSearch.py @ 2615

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/ndgSearch.py@2615
Revision 2615, 9.4 KB checked in by lawrence, 12 years ago (diff)

Moving some of the wsgi stack into the ows stack.
At this point an xml dif retrieve works in ows_common.

Line 
1# these imports are autogenerated by wsdl2py ...
2from DiscoveryService_services_types import *
3from DiscoveryService_services import *
4
5#normal imports
6import sys, time
7try: #python 2.5
8    from xml.etree import ElementTree as ET
9except ImportError:
10    try:
11        # if you've installed it yourself it comes this way
12        import ElementTree as ET
13    except ImportError:
14        # if you've egged it this is the way it comes
15        from elementtree import ElementTree as ET
16   
17from ETxmlView import loadET,et2text
18
19from xml.dom import expatbuilder
20
21class ExpatReaderClass:
22      fromString = staticmethod(expatbuilder.parseString)
23      fromStream = staticmethod(expatbuilder.parse)
24 
25
26class ndgSearch:
27    ''' Provides a client to the NDG discovery services methods exposed by the Discovery.wsdl '''
28   
29    def __init__(self,logger=None,tracefile=None):
30        '''get an instance of the service'''
31        #how do we get a different backend provider?
32        loc=DiscoveryServiceLocator()
33        #self.server=loc.getDiscovery(readerclass=ExpatReaderClass,
34        self.server=loc.getDiscoveryServicePortType(readerclass=ExpatReaderClass,tracefile=tracefile)
35        self.serverSessionID=None
36        self.logger=logger
37        self.__reset()
38       
39    def __reset(self):
40        ''' Initialise and/or making sure we have no hangovers from a previous call '''
41        self.documents=None
42        self.hits=None
43        self.error=None
44        self.response=None
45        self.status=None
46   
47    def search(self,term,start=1,howmany=20,target='All',scope=None,bbox=None,dateRange=None,):
48        ''' Get a list of documents for a specific term using a free text search'''
49       
50        self.__reset()
51        #if target=='Authors':
52        #    request=doAuthorSearchRequest()
53        #elif target=='Params':
54        #    request=doParameterSearchRequest()
55        #else:
56        #    request=doFullTextSearchRequest()
57        request=doSearchRequest()
58        request.Term=term
59        request.Start=start
60        request.HowMany=howmany
61        request.TermType={'Authors':'author','Params':'parameter','All':'fullText'}[target]
62        if bbox is not None:
63            box=request.new_BoundingBox()
64            box.LimitNorth,box.LimitSouth,box.LimitWest,box.LimitEast=map(float,bbox)
65            request.BoundingBox=box
66       
67        if scope is not None:
68            request.Scope=scope
69       
70        if dateRange is not None:
71            start=int(dateRange[0][2]),int(dateRange[0][1]),int(dateRange[0][0]),0,0,0,0
72            end=int(dateRange[1][2]),int(dateRange[1][1]),int(dateRange[1][0]),0,0,0,0
73           
74            dRange=request.new_DateRange()
75            dRange.DateRangeStart=start
76            dRange.DateRangeEnd=end
77            request.DateRange=dRange
78
79        if self.logger: itime=time.time()
80        response=self.server.doSearch(request)
81           
82        if self.logger:
83            etime=time.time()-itime
84            self.logger.info('Search Request [%s] took [%ss]'%(term,etime))
85        if response._status:
86            self.serverSessionID=response._resultId
87            self.hits=response.Hits
88            self.documents=response.Documents.Document
89        else:
90            self.hits=response.Hits
91        self.status=[response._statusMessage,]
92        self.response=response
93        return self.documents
94
95    def getDoc(self,document,format='original'):
96        ''' Return a single document from the backend database '''
97       
98        #create a request object
99        request=doPresentRequest()
100        #get an instance of the Documents holder class
101        DocList=request.new_documents()
102        request.Documents=DocList
103        DocList.Document=[document,]
104        request.Format=format
105        #try:
106        searchResult=self.server.doPresent(request)
107        #except Exception,e:
108        #   raise Exception,e
109        if searchResult._status:
110            result=searchResult.Documents.Document[0]
111        else:
112            raise ValueError('Error retrieving [%s] was [%s]'%(document,searchResult._statusMessage))
113            #result=searchResult._statusMessage
114        return result
115                   
116    def getAllDocs(self,format='original'):           
117        ''' Parse the list of documents, and retrieve them directly '''
118       
119        if self.documents is None: return []
120        if self.logger: itime=time.time()
121         
122        #create a request object
123        request=doPresentRequest()
124        #get an instance of the Documents holder class
125        DocList=request.new_documents()
126        request.Documents=DocList
127        DocList.Document=self.documents
128        request.Format=format
129        result=self.server.doPresent(request)
130        if result._status:
131            docs=result.Documents.Document
132        else:
133            raise ValueError('Error retrieving [%s] was [%s]'%
134                        (self.documents,result._statusMessage))
135        if self.logger:
136            etime=time.time()-itime
137            self.logger.info('Document Load [n=%s] took [%ss]'%(len(self.documents),etime))
138
139        return docs
140           
141    def __xmlerror(self,docmessage):
142        print docmessage
143        return ET.fromstring('<Error><Document>%s</Document><Message>%s</Message></Error>'%docmessage)
144           
145    def getDocElement(self,document):
146        ''' Takes a document path (maybe from a previous call to ndgSearch) and extracts that document
147        into an ElementTree instance '''
148        #we stick it straight into element tree because we need to use et to get the actual document
149        #we want, not the envelope xml elements
150       
151        doc=self.getDoc(document)
152        path=document.rstrip('.xml')
153        try:
154            r=loadET(doc)
155            #return r.find(path)
156            return r
157        except:
158            return self.__xmlerror((path,doc))
159           
160    def getAllDocsAsElements(self):
161        ''' Get all the documents and load them into a list of ET instances '''
162        result=[]
163        docs=self.getAllDocs()
164        for doc in docs: 
165            try:
166                r=loadET(doc)
167                result.append(r)#result.append(r.find(path))
168            except:
169                result.append(self.__xmlerror(doc))
170        return result
171           
172           
173    def getLabelledDocs(self,format='original'):
174        ''' Returns all the documents in sequence in a labelled list of strings'''
175        if self.hits==0: return []
176        #filenames=self.documents.Document
177        #so we know that the following call is the problem ...
178        responses=self.getAllDocs(format)
179        filenames=self.documents
180        i=len(filenames)
181        j=len(responses)
182        if i!=j:
183            print filenames
184            raise ValueError,'Internal inconsistency in search return [hits:%s!=responses:%s]'%(i,j)
185        indices=range(i)
186        results=[]
187        for i in indices:
188            results.append((filenames[i].strip('.xml'),responses[i]))
189        return results
190       
191    def get(self,repository,schema,localID,format='original',targetCollection=None):
192        ''' Obtain a document via it's NDG id split up '''
193        #nb argument targetCollection is here to provide same API as exist xmlrpc interface
194        uri='%s__%s__%s'%(repository,schema,localID)
195        fileName=uri+'.xml'
196        return self.getDoc(fileName,format)
197       
198       
199import unittest
200
201class TestCase(unittest.TestCase):
202   
203    def testSearch(self):
204        ''' Test fundamental search capability '''
205        term='temperature'
206        s=ndgSearch()
207        howmany=10
208        docs=s.search(term,start=1,howmany=howmany)
209        print 'Asked for ',howmany,' documents (there were %s hits):'%s.hits
210        print s.documents
211       
212    def testgetLabelledDocs(self):
213        ''' Test returning of labelled documents '''
214        term='CD97'
215        s=ndgSearch()
216        r=s.search(term)
217        print s.status
218        print s.documents
219        output=s.getLabelledDocs()
220       
221    def testNoReturn(self):
222        ''' Tests a search return with (hopefully nothing to be found)'''
223        term='xpabnl'
224        s=ndgSearch()
225        r=s.search(term)
226        print 'Hopefully this is zero: if not, expect the NoReturn test to fail:',s.hits
227        output=s.getLabelledDocs()
228        self.assertEqual(len(output),0)
229       
230    def testGetDoc(self):
231        ''' Test obtaining a specific document which had better exist '''
232        doc='noc.soton.ac.uk__DIF__NOCSDAT100.xml'
233        #doc='ndg.noc.soton.ac.uk__DIF__NOCSDAT274.xml'
234        s=ndgSearch()
235        r=s.getDoc(doc)
236       
237    def testSequence(self):
238        ''' Tests that repeated searches work and can support zero responses in the middle '''
239        # this was a bug we found and needed a test case for
240        s=ndgSearch()
241        term='CD97' # hopefully just get a couple of hits for now.
242        r=s.search(term)
243        print s.hits
244        #if we uncomment this it used to break, and not if uncommented, now it seems ok...
245        output=s.getLabelledDocs() 
246        #this failed as well, and we isolated the dependency in the previous call to this one.
247        #res=s.getAllDocs()
248        term='xpabnl'
249        r=s.search(term)
250        print s.hits
251        print 'Hopefully this is also zero: if not, expect the Sequence test to fail:',s.hits
252       
253    def testGet(self):
254        ''' Tests getting via uri components '''
255        (r,s,l)='neodc.nerc.ac.uk','DIF','NEODC_ARSF_ATM_DAED'
256        ss=ndgSearch()
257        x=ss.get(r,s,l,format='DC')
258       
259    def testFailedGet(self):
260        doc='abc'
261        s=ndgSearch()
262        r=s.getDoc(doc)
263        assert 'Error' in r
264
265if __name__=="__main__":
266    unittest.main()
Note: See TracBrowser for help on using the repository browser.