source: ndgCommon/trunk/ndg/common/src/clients/ws/discovery/discoveryserviceclient.py @ 5006

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/ws/discovery/discoveryserviceclient.py@5006
Revision 5006, 9.8 KB checked in by cbyrom, 13 years ago (diff)

Restructure webservice client code - put discovery service client
into its own package and move tests and references accordingly.

Line 
1'''
2 Client to the DiscoveryService search web service
3 
4 @author: C Byrom, Tessella Feb 2009
5'''
6# these imports are autogenerated by wsdl2py ...
7from ndg.common.src.clients.ws.discovery.generated.DiscoveryService_services_types import *
8from ndg.common.src.clients.ws.discovery.generated.DiscoveryService_services import *
9from ndg.common.src.clients.interfacesearchclient import InterfaceSearchClient
10
11import sys, time, os.path, logging
12from xml.dom import expatbuilder
13
14class DiscoveryServiceClientError(Exception):
15    """
16    Exception handling for NDG Search class.
17    """
18    def __init__(self, msg):
19        logging.error(msg)
20        Exception.__init__(self, msg)
21
22class ExpatReaderClass:
23      fromString = staticmethod(expatbuilder.parseString)
24      fromStream = staticmethod(expatbuilder.parse)
25
26# Locator (this mimics the code which lives in the wsdl2py generated DiscoveryService_services,
27# but explicitly allows a different port address) ...
28default_HostAndPort="http://ndg.badc.rl.ac.uk:8080/axis2/services/DiscoveryService"
29
30class DiscoveryServiceClient(InterfaceSearchClient):
31    '''
32    Provides a client to the NDG discovery services methods exposed by the Discovery.wsdl
33    '''
34   
35    def __init__(self, tracefile=None, HostAndPort=default_HostAndPort):
36        '''Get an instance of the NDG Discovery Service.
37            Optional Keywords are:
38                tracefile - for collecting debug output
39                HostAndPort - the location of the SOAP web service (default is ndg Glue)
40        '''
41        #Get a different backend provider via the url argument
42        loc=DiscoveryServiceLocator()
43        self.server=loc.getDiscoveryServicePortType(url=HostAndPort,
44                                                    readerclass=ExpatReaderClass,
45                                                    tracefile=tracefile)
46        self.serverSessionID=None
47        logging.info('Discovery web service connection to %s'%HostAndPort)
48        self.__reset()
49       
50    def __reset(self):
51        ''' Initialise and/or making sure we have no hangovers from a previous call '''
52        self.documents=None
53        self.hits=None
54        self.error=None
55        self.response=None
56        self.status=None
57   
58    def search(self,term,start=1,howmany=20,target='All',
59               scope=None,bbox=None,dateRange=None,geoSearchType='overlaps'):
60        '''
61        Get a list of documents, with basic summary info for each doc, for a
62        specific term using a free text search
63        @param term: term to search on
64        @keyword start:  first record to return
65        @keyword howmany: number of records to return
66        @keyword target: target of search. Default = 'All'
67        @keyword scope: scope of search
68        @keyword bbox: bounding box for search data - NB, this should be an array with
69        content, [N, W, E, S]
70        @keyword dateRange: range of dates to use in search - an array with content,
71        [startDate, endDate]
72        @keyword geoSearchType: type of geospatial search to use - NB, currently only
73        'overlaps' accepted
74        @return: array of search results; if no results are found this array should be empty
75        '''
76        logging.info("Running search - with term, '%s'" %term)
77
78        self.__reset()
79        request=doSearchRequest()
80        request.Term=term
81        request.Start=start
82        request.HowMany=howmany
83        request.TermType={'Authors':'author','Params':'parameter','All':'fullText'}[target]
84        if bbox is not None:
85            box=request.new_BoundingBox()
86            box.LimitNorth,box.LimitWest,box.LimitEast,box.LimitSouth=map(float,bbox)
87            request.BoundingBox=box
88            request.SpatialOperator=geoSearchType
89       
90        if scope is not None:
91            request.Scope=scope
92       
93        if dateRange is not None:
94            start=int(dateRange[0][2]),int(dateRange[0][1]),int(dateRange[0][0]),0,0,0,0
95            end=int(dateRange[1][2]),int(dateRange[1][1]),int(dateRange[1][0]),0,0,0,0
96           
97            dRange=request.new_DateRange()
98            dRange.DateRangeStart=start
99            dRange.DateRangeEnd=end
100            request.DateRange=dRange
101
102        itime=time.time()
103        response=self.server.doSearch(request)
104        etime=time.time()-itime
105        logging.debug('Search Request [%s] took [%ss]'%(term,etime))
106
107        # NB, set default and only change this if response lists Hits - this
108        # can return 'None' - so isn't reliable var to set to without checking
109        self.hits = 0
110        if response.Hits:
111            self.hits = response.Hits
112           
113        if response._status:
114            self.serverSessionID=response._resultId
115            self.documents=response.Documents.Document
116       
117        logging.info("Search returned %s results: %s" %(self.hits, self.documents))
118        self.status=[response._statusMessage,]
119        self.response=response
120        logging.info("Search complete")
121        return self.documents
122
123
124    def __setUpRequestObject(self, documents, format):
125        '''
126        Set up a request object and set it up with the appropriate
127        Documents holder class and format
128        @param documents: documents to hold in request
129        @type documents: list
130        @param format: document format to use
131        @type format: str
132        @return: request object with Documents holder and format set up
133        '''
134        logging.debug("Setting up request object")
135        request=doPresentRequest()
136        request.Documents = request.new_documents()
137        request.Documents.Document = documents
138        request.Format = format
139        logging.debug("Request object set up")
140        return request
141
142
143    def __runDocumentPresent(self, request):
144        '''
145        Run a document retrieval, provided the request to use
146        @param request: request object to run the doPresent call with
147        @type request: request
148        @return: result - array of documents returned from call
149        @raise ValueError if document not retrieved properly
150        '''
151        logging.debug("Running 'doPresent()'")
152        itime=time.time()
153        searchResult=self.server.doPresent(request)
154        if searchResult._status:
155            result=searchResult.Documents.Document
156        else:
157            raise ValueError('Error during document retrieval: %s' \
158                             %searchResult._statusMessage)
159        etime=time.time()-itime
160        logging.debug('Document Load took [%ss]' %etime)
161        logging.debug("'doPresent() completed")
162        return result
163         
164
165    def getDoc(self, document, format='original'):
166        '''
167        Return a single document from the backend database
168        '''
169        logging.info("Retrieving document, '%s' in %s format" %(document, format))
170
171        request = self.__setUpRequestObject([document], format)
172
173        docs = self.__runDocumentPresent(request)
174        logging.info("Document retrieved successfully")
175        return docs[0]
176
177       
178    def getDocs(self, documents, format='original'):
179        '''
180        Return one or more complete document
181        @param documents: array of documents to retrieve
182        @keyword format: format the retrieved document should be in.  Default, 'original'
183        returns documents in their original format 
184        @raise ValueError if the requested document(s) cannot be found
185        @return: dictionary with format, {docName: docContents}
186        '''
187        logging.info("Retrieving docs: %s" %documents)
188        results = {}
189        for doc in documents:
190            results[doc] = self.getDoc(doc, format = format) 
191       
192        logging.info("- returning docs")
193        return results
194
195                   
196    def getAllDocs(self,format='original'):           
197        '''
198        Parse the list of documents, and retrieve them directly
199        '''
200        logging.info("Retrieving all documents specified")
201        logging.debug("- %s" %self.documents)
202        if self.documents is None:
203            logging.info("No documents specified to retrieve - returning")
204            return []
205       
206        request = self.__setUpRequestObject(self.documents, format)
207        docs = self.__runDocumentPresent(request)
208        logging.info("Documents retrieved successfully")
209        return docs
210
211           
212    def __xmlerror(self,docmessage):
213        print docmessage
214        return ET.fromstring('<Error><Document>%s</Document><Message>%s</Message></Error>'%docmessage)
215
216           
217    def getLabelledDocs(self,format='original'):
218        '''
219        Returns all the documents in sequence in a labelled list of strings
220        '''
221        logging.info("Retrieving all labelled documents")
222        if self.hits==0: 
223            return []
224
225        responses = self.getAllDocs(format)
226       
227        i=len(self.documents)
228        j=len(responses)
229        if i!=j:
230            logging.debug("Requested docs: %s\nActual results: %s" \
231                          %(self.documents, responses))
232            raise DiscoveryServiceClientError('Internal inconsistency in search \
233                return [hits:%s!=responses:%s]'%(i,j))
234        indices=range(i)
235        results=[]
236        for i in indices:
237            ff=os.path.splitext(self.documents[i])
238            results.append((ff[0],responses[i]))
239        logging.info("Documents retrieved")
240        return results
241
242
243    def getNDGDoc(self, providerID, docType, localID, format='DIF', **kw):
244        '''
245        Retrieve the document represented by an ndgURI
246        @param providerID: ID of the provider of the metadata
247        @param docType: document type - see ndgObject for valid types
248        @param localID: local ID of the ndg document to retrieve
249        @keyword format: format to return document in
250        @return document matching the ndgURI
251        '''
252        uri='%s__%s__%s'%(providerID,docType,localID)
253        fileName=uri+'.xml'
254        return self.getDoc(fileName,format)
255
Note: See TracBrowser for help on using the repository browser.