source: TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/ndgSearch.py @ 3059

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/ndgSearch.py@3059
Revision 3059, 9.6 KB checked in by lawrence, 12 years ago (diff)

Fix for bounding box order (ticket:566)

Line 
1# these imports are autogenerated by wsdl2py ...
2from DiscoveryService_services_types import *
3from DiscoveryService_services import *
4import os.path
5
6#normal imports
7import sys, time
8try: #python 2.5
9    from xml.etree import ElementTree as ET
10except ImportError:
11    try:
12        # if you've installed it yourself it comes this way
13        import ElementTree as ET
14    except ImportError:
15        # if you've egged it this is the way it comes
16        from elementtree import ElementTree as ET
17   
18from ETxmlView import loadET,et2text
19
20from xml.dom import expatbuilder
21
22class ExpatReaderClass:
23      fromString = staticmethod(expatbuilder.parseString)
24      fromStream = staticmethod(expatbuilder.parse)
25 
26
27class ndgSearch:
28    ''' Provides a client to the NDG discovery services methods exposed by the Discovery.wsdl '''
29   
30    def __init__(self,logger=None,tracefile=None):
31        '''get an instance of the service'''
32        #how do we get a different backend provider?
33        loc=DiscoveryServiceLocator()
34        #self.server=loc.getDiscovery(readerclass=ExpatReaderClass,
35        self.server=loc.getDiscoveryServicePortType(readerclass=ExpatReaderClass,tracefile=tracefile)
36        self.serverSessionID=None
37        self.logger=logger
38        self.__reset()
39       
40    def __reset(self):
41        ''' Initialise and/or making sure we have no hangovers from a previous call '''
42        self.documents=None
43        self.hits=None
44        self.error=None
45        self.response=None
46        self.status=None
47   
48    def search(self,term,start=1,howmany=20,target='All',scope=None,bbox=None,dateRange=None,):
49        ''' Get a list of documents for a specific term using a free text search'''
50       
51        self.__reset()
52        #if target=='Authors':
53        #    request=doAuthorSearchRequest()
54        #elif target=='Params':
55        #    request=doParameterSearchRequest()
56        #else:
57        #    request=doFullTextSearchRequest()
58        request=doSearchRequest()
59        request.Term=term
60        request.Start=start
61        request.HowMany=howmany
62        request.TermType={'Authors':'author','Params':'parameter','All':'fullText'}[target]
63        if bbox is not None:
64            box=request.new_BoundingBox()
65            box.LimitNorth,box.LimitWest,box.LimitEast,box.LimitSouth=map(float,bbox)
66            request.BoundingBox=box
67       
68        if scope is not None:
69            request.Scope=scope
70       
71        if dateRange is not None:
72            start=int(dateRange[0][2]),int(dateRange[0][1]),int(dateRange[0][0]),0,0,0,0
73            end=int(dateRange[1][2]),int(dateRange[1][1]),int(dateRange[1][0]),0,0,0,0
74           
75            dRange=request.new_DateRange()
76            dRange.DateRangeStart=start
77            dRange.DateRangeEnd=end
78            request.DateRange=dRange
79
80        if self.logger: itime=time.time()
81        response=self.server.doSearch(request)
82           
83        if self.logger:
84            etime=time.time()-itime
85            self.logger.info('Search Request [%s] took [%ss]'%(term,etime))
86        if response._status:
87            self.serverSessionID=response._resultId
88            self.hits=response.Hits
89            self.documents=response.Documents.Document
90        else:
91            self.hits=response.Hits
92        self.status=[response._statusMessage,]
93        self.response=response
94        return self.documents
95
96    def getDoc(self,document,format='original'):
97        ''' Return a single document from the backend database '''
98       
99        #create a request object
100        request=doPresentRequest()
101        #get an instance of the Documents holder class
102        DocList=request.new_documents()
103        request.Documents=DocList
104        DocList.Document=[document,]
105        request.Format=format
106        #try:
107        searchResult=self.server.doPresent(request)
108        #except Exception,e:
109        #   raise Exception,e
110        if searchResult._status:
111            result=searchResult.Documents.Document[0]
112        else:
113            raise ValueError('Error retrieving [%s] was [%s]'%(document,searchResult._statusMessage))
114            #result=searchResult._statusMessage
115        return result
116                   
117    def getAllDocs(self,format='original'):           
118        ''' Parse the list of documents, and retrieve them directly '''
119       
120        if self.documents is None: return []
121        if self.logger: itime=time.time()
122         
123        #create a request object
124        request=doPresentRequest()
125        #get an instance of the Documents holder class
126        DocList=request.new_documents()
127        request.Documents=DocList
128        DocList.Document=self.documents
129        request.Format=format
130        result=self.server.doPresent(request)
131        if result._status:
132            docs=result.Documents.Document
133        else:
134            raise ValueError('Error retrieving [%s] was [%s]'%
135                        (self.documents,result._statusMessage))
136        if self.logger:
137            etime=time.time()-itime
138            self.logger.info('Document Load [n=%s] took [%ss]'%(len(self.documents),etime))
139
140        return docs
141           
142    def __xmlerror(self,docmessage):
143        print docmessage
144        return ET.fromstring('<Error><Document>%s</Document><Message>%s</Message></Error>'%docmessage)
145           
146    def getDocElement(self,document):
147        ''' Takes a document path (maybe from a previous call to ndgSearch) and extracts that document
148        into an ElementTree instance '''
149        #we stick it straight into element tree because we need to use et to get the actual document
150        #we want, not the envelope xml elements
151       
152        doc=self.getDoc(document)
153        path=document.rstrip('.xml')
154        try:
155            r=loadET(doc)
156            #return r.find(path)
157            return r
158        except:
159            return self.__xmlerror((path,doc))
160           
161    def getAllDocsAsElements(self):
162        ''' Get all the documents and load them into a list of ET instances '''
163        result=[]
164        docs=self.getAllDocs()
165        for doc in docs: 
166            try:
167                r=loadET(doc)
168                result.append(r)#result.append(r.find(path))
169            except:
170                result.append(self.__xmlerror(doc))
171        return result
172           
173           
174    def getLabelledDocs(self,format='original'):
175        ''' Returns all the documents in sequence in a labelled list of strings'''
176        if self.hits==0: return []
177        #filenames=self.documents.Document
178        #so we know that the following call is the problem ...
179        responses=self.getAllDocs(format)
180        filenames=self.documents
181        i=len(filenames)
182        j=len(responses)
183        if i!=j:
184            print filenames
185            raise ValueError,'Internal inconsistency in search return [hits:%s!=responses:%s]'%(i,j)
186        indices=range(i)
187        results=[]
188        for i in indices:
189            ####results.append((filenames[i].strip('.xml'),responses[i]))
190            ####Wow, 'higem.xml'.strip('.xml') = hige ... and that's what split is supposed to do!
191            ff=os.path.splitext(filenames[i])
192            results.append((ff[0],responses[i]))
193        return results
194       
195    def get(self,repository,schema,localID,format='original',targetCollection=None):
196        ''' Obtain a document via it's NDG id split up '''
197        #nb argument targetCollection is here to provide same API as exist xmlrpc interface
198        uri='%s__%s__%s'%(repository,schema,localID)
199        fileName=uri+'.xml'
200        return self.getDoc(fileName,format)
201       
202       
203import unittest
204
205class TestCase(unittest.TestCase):
206   
207    def testSearch(self):
208        ''' Test fundamental search capability '''
209        term='temperature'
210        s=ndgSearch()
211        howmany=10
212        docs=s.search(term,start=1,howmany=howmany)
213        print 'Asked for ',howmany,' documents (there were %s hits):'%s.hits
214        print s.documents
215       
216    def testgetLabelledDocs(self):
217        ''' Test returning of labelled documents '''
218        term='CD97'
219        s=ndgSearch()
220        r=s.search(term)
221        print s.status
222        print s.documents
223        output=s.getLabelledDocs()
224       
225    def testNoReturn(self):
226        ''' Tests a search return with (hopefully nothing to be found)'''
227        term='xpabnl'
228        s=ndgSearch()
229        r=s.search(term)
230        print 'Hopefully this is zero: if not, expect the NoReturn test to fail:',s.hits
231        output=s.getLabelledDocs()
232        self.assertEqual(len(output),0)
233       
234    def testGetDoc(self):
235        ''' Test obtaining a specific document which had better exist '''
236        doc='noc.soton.ac.uk__DIF__NOCSDAT100.xml'
237        #doc='ndg.noc.soton.ac.uk__DIF__NOCSDAT274.xml'
238        s=ndgSearch()
239        r=s.getDoc(doc)
240       
241    def testSequence(self):
242        ''' Tests that repeated searches work and can support zero responses in the middle '''
243        # this was a bug we found and needed a test case for
244        s=ndgSearch()
245        term='CD97' # hopefully just get a couple of hits for now.
246        r=s.search(term)
247        print s.hits
248        #if we uncomment this it used to break, and not if uncommented, now it seems ok...
249        output=s.getLabelledDocs() 
250        #this failed as well, and we isolated the dependency in the previous call to this one.
251        #res=s.getAllDocs()
252        term='xpabnl'
253        r=s.search(term)
254        print s.hits
255        print 'Hopefully this is also zero: if not, expect the Sequence test to fail:',s.hits
256       
257    def testGet(self):
258        ''' Tests getting via uri components '''
259        (r,s,l)='neodc.nerc.ac.uk','DIF','NEODC_ARSF_ATM_DAED'
260        ss=ndgSearch()
261        x=ss.get(r,s,l,format='DC')
262       
263    def testFailedGet(self):
264        doc='abc'
265        s=ndgSearch()
266        r=s.getDoc(doc)
267        assert 'Error' in r
268
269if __name__=="__main__":
270    unittest.main()
Note: See TracBrowser for help on using the repository browser.