source: TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/ndgSearch.py @ 3093

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI05-delivery/ows_framework/trunk/ows_server/ows_server/models/ndgSearch.py@3093
Revision 3093, 9.7 KB checked in by lawrence, 12 years ago (diff)

Most of the coding for geospatial operators. Improvements
to results display for multiple bounding boxes.

Line 
1# these imports are autogenerated by wsdl2py ...
2from DiscoveryService_services_types import *
3from DiscoveryService_services import *
4import os.path
5
6#normal imports
7import sys, time
8try: #python 2.5
9    from xml.etree import ElementTree as ET
10except ImportError:
11    try:
12        # if you've installed it yourself it comes this way
13        import ElementTree as ET
14    except ImportError:
15        # if you've egged it this is the way it comes
16        from elementtree import ElementTree as ET
17   
18from ETxmlView import loadET,et2text
19
20from xml.dom import expatbuilder
21
22class ExpatReaderClass:
23      fromString = staticmethod(expatbuilder.parseString)
24      fromStream = staticmethod(expatbuilder.parse)
25 
26
27class ndgSearch:
28    ''' Provides a client to the NDG discovery services methods exposed by the Discovery.wsdl '''
29   
30    def __init__(self,logger=None,tracefile=None):
31        '''get an instance of the service'''
32        #how do we get a different backend provider?
33        loc=DiscoveryServiceLocator()
34        #self.server=loc.getDiscovery(readerclass=ExpatReaderClass,
35        self.server=loc.getDiscoveryServicePortType(readerclass=ExpatReaderClass,tracefile=tracefile)
36        self.serverSessionID=None
37        self.logger=logger
38        self.__reset()
39       
40    def __reset(self):
41        ''' Initialise and/or making sure we have no hangovers from a previous call '''
42        self.documents=None
43        self.hits=None
44        self.error=None
45        self.response=None
46        self.status=None
47   
48    def search(self,term,start=1,howmany=20,target='All',scope=None,bbox=None,dateRange=None,geoSearchType='overlaps'):
49        ''' Get a list of documents for a specific term using a free text search'''
50       
51        self.__reset()
52        #if target=='Authors':
53        #    request=doAuthorSearchRequest()
54        #elif target=='Params':
55        #    request=doParameterSearchRequest()
56        #else:
57        #    request=doFullTextSearchRequest()
58        request=doSearchRequest()
59        request.Term=term
60        request.Start=start
61        request.HowMany=howmany
62        request.TermType={'Authors':'author','Params':'parameter','All':'fullText'}[target]
63        if bbox is not None:
64            box=request.new_BoundingBox()
65            box.LimitNorth,box.LimitWest,box.LimitEast,box.LimitSouth=map(float,bbox)
66            request.BoundingBox=box
67            request.SpatialOperator=geoSearchType
68       
69        if scope is not None:
70            request.Scope=scope
71       
72        if dateRange is not None:
73            start=int(dateRange[0][2]),int(dateRange[0][1]),int(dateRange[0][0]),0,0,0,0
74            end=int(dateRange[1][2]),int(dateRange[1][1]),int(dateRange[1][0]),0,0,0,0
75           
76            dRange=request.new_DateRange()
77            dRange.DateRangeStart=start
78            dRange.DateRangeEnd=end
79            request.DateRange=dRange
80
81        if self.logger: itime=time.time()
82        response=self.server.doSearch(request)
83           
84        if self.logger:
85            etime=time.time()-itime
86            self.logger.info('Search Request [%s] took [%ss]'%(term,etime))
87        if response._status:
88            self.serverSessionID=response._resultId
89            self.hits=response.Hits
90            self.documents=response.Documents.Document
91        else:
92            self.hits=response.Hits
93        self.status=[response._statusMessage,]
94        self.response=response
95        return self.documents
96
97    def getDoc(self,document,format='original'):
98        ''' Return a single document from the backend database '''
99       
100        #create a request object
101        request=doPresentRequest()
102        #get an instance of the Documents holder class
103        DocList=request.new_documents()
104        request.Documents=DocList
105        DocList.Document=[document,]
106        request.Format=format
107        #try:
108        searchResult=self.server.doPresent(request)
109        #except Exception,e:
110        #   raise Exception,e
111        if searchResult._status:
112            result=searchResult.Documents.Document[0]
113        else:
114            raise ValueError('Error retrieving [%s] was [%s]'%(document,searchResult._statusMessage))
115            #result=searchResult._statusMessage
116        return result
117                   
118    def getAllDocs(self,format='original'):           
119        ''' Parse the list of documents, and retrieve them directly '''
120       
121        if self.documents is None: return []
122        if self.logger: itime=time.time()
123         
124        #create a request object
125        request=doPresentRequest()
126        #get an instance of the Documents holder class
127        DocList=request.new_documents()
128        request.Documents=DocList
129        DocList.Document=self.documents
130        request.Format=format
131        result=self.server.doPresent(request)
132        if result._status:
133            docs=result.Documents.Document
134        else:
135            raise ValueError('Error retrieving [%s] was [%s]'%
136                        (self.documents,result._statusMessage))
137        if self.logger:
138            etime=time.time()-itime
139            self.logger.info('Document Load [n=%s] took [%ss]'%(len(self.documents),etime))
140
141        return docs
142           
143    def __xmlerror(self,docmessage):
144        print docmessage
145        return ET.fromstring('<Error><Document>%s</Document><Message>%s</Message></Error>'%docmessage)
146           
147    def getDocElement(self,document):
148        ''' Takes a document path (maybe from a previous call to ndgSearch) and extracts that document
149        into an ElementTree instance '''
150        #we stick it straight into element tree because we need to use et to get the actual document
151        #we want, not the envelope xml elements
152       
153        doc=self.getDoc(document)
154        path=document.rstrip('.xml')
155        try:
156            r=loadET(doc)
157            #return r.find(path)
158            return r
159        except:
160            return self.__xmlerror((path,doc))
161           
162    def getAllDocsAsElements(self):
163        ''' Get all the documents and load them into a list of ET instances '''
164        result=[]
165        docs=self.getAllDocs()
166        for doc in docs: 
167            try:
168                r=loadET(doc)
169                result.append(r)#result.append(r.find(path))
170            except:
171                result.append(self.__xmlerror(doc))
172        return result
173           
174           
175    def getLabelledDocs(self,format='original'):
176        ''' Returns all the documents in sequence in a labelled list of strings'''
177        if self.hits==0: return []
178        #filenames=self.documents.Document
179        #so we know that the following call is the problem ...
180        responses=self.getAllDocs(format)
181        filenames=self.documents
182        i=len(filenames)
183        j=len(responses)
184        if i!=j:
185            print filenames
186            raise ValueError,'Internal inconsistency in search return [hits:%s!=responses:%s]'%(i,j)
187        indices=range(i)
188        results=[]
189        for i in indices:
190            ####results.append((filenames[i].strip('.xml'),responses[i]))
191            ####Wow, 'higem.xml'.strip('.xml') = hige ... and that's what split is supposed to do!
192            ff=os.path.splitext(filenames[i])
193            results.append((ff[0],responses[i]))
194        return results
195       
196    def get(self,repository,schema,localID,format='original',targetCollection=None):
197        ''' Obtain a document via it's NDG id split up '''
198        #nb argument targetCollection is here to provide same API as exist xmlrpc interface
199        uri='%s__%s__%s'%(repository,schema,localID)
200        fileName=uri+'.xml'
201        return self.getDoc(fileName,format)
202       
203       
204import unittest
205
206class TestCase(unittest.TestCase):
207   
208    def testSearch(self):
209        ''' Test fundamental search capability '''
210        term='temperature'
211        s=ndgSearch()
212        howmany=10
213        docs=s.search(term,start=1,howmany=howmany)
214        print 'Asked for ',howmany,' documents (there were %s hits):'%s.hits
215        print s.documents
216       
217    def testgetLabelledDocs(self):
218        ''' Test returning of labelled documents '''
219        term='CD97'
220        s=ndgSearch()
221        r=s.search(term)
222        print s.status
223        print s.documents
224        output=s.getLabelledDocs()
225       
226    def testNoReturn(self):
227        ''' Tests a search return with (hopefully nothing to be found)'''
228        term='xpabnl'
229        s=ndgSearch()
230        r=s.search(term)
231        print 'Hopefully this is zero: if not, expect the NoReturn test to fail:',s.hits
232        output=s.getLabelledDocs()
233        self.assertEqual(len(output),0)
234       
235    def testGetDoc(self):
236        ''' Test obtaining a specific document which had better exist '''
237        doc='noc.soton.ac.uk__DIF__NOCSDAT100.xml'
238        #doc='ndg.noc.soton.ac.uk__DIF__NOCSDAT274.xml'
239        s=ndgSearch()
240        r=s.getDoc(doc)
241       
242    def testSequence(self):
243        ''' Tests that repeated searches work and can support zero responses in the middle '''
244        # this was a bug we found and needed a test case for
245        s=ndgSearch()
246        term='CD97' # hopefully just get a couple of hits for now.
247        r=s.search(term)
248        print s.hits
249        #if we uncomment this it used to break, and not if uncommented, now it seems ok...
250        output=s.getLabelledDocs() 
251        #this failed as well, and we isolated the dependency in the previous call to this one.
252        #res=s.getAllDocs()
253        term='xpabnl'
254        r=s.search(term)
255        print s.hits
256        print 'Hopefully this is also zero: if not, expect the Sequence test to fail:',s.hits
257       
258    def testGet(self):
259        ''' Tests getting via uri components '''
260        (r,s,l)='neodc.nerc.ac.uk','DIF','NEODC_ARSF_ATM_DAED'
261        ss=ndgSearch()
262        x=ss.get(r,s,l,format='DC')
263       
264    def testFailedGet(self):
265        doc='abc'
266        s=ndgSearch()
267        r=s.getDoc(doc)
268        assert 'Error' in r
269
270if __name__=="__main__":
271    unittest.main()
Note: See TracBrowser for help on using the repository browser.