source: ndgCommon/trunk/ndg/common/src/clients/ws/discovery/discoveryserviceclient.py @ 5748

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/ws/discovery/discoveryserviceclient.py@5748
Revision 5748, 11.4 KB checked in by sdonegan, 10 years ago (diff)

replaced hardcoded url for discovery service API - caused huge problems, should be fixed...

Line 
1'''
2 Client to the DiscoveryService search web service
3 
4 @author: C Byrom, Tessella Feb 2009
5'''
6# these imports are autogenerated by wsdl2py ...
7from ndg.common.src.clients.ws.discovery.generated.DiscoveryService_services_types import *
8from ndg.common.src.clients.ws.discovery.generated.DiscoveryService_services import *
9from ndg.common.src.clients.interfacesearchclient import InterfaceSearchClient
10
11import sys, time, os.path, logging
12from xml.dom import expatbuilder
13
14class DiscoveryServiceClientError(Exception):
15    """
16    Exception handling for NDG Search class.
17    """
18    def __init__(self, msg):
19        logging.error(msg)
20        Exception.__init__(self, msg)
21
22class ExpatReaderClass:
23      fromString = staticmethod(expatbuilder.parseString)
24      fromStream = staticmethod(expatbuilder.parse)
25
26# Locator (this mimics the code which lives in the wsdl2py generated DiscoveryService_services,
27# but explicitly allows a different port address) ...
28#default_HostAndPort="http://ndg.badc.rl.ac.uk:8080/axis2/services/DiscoveryService"
29default_HostAndPort="http://ndg3beta.badc.rl.ac.uk:8080/axis2/services/DiscoveryService"
30
31class DiscoveryServiceClient(InterfaceSearchClient):
32    '''
33    Provides a client to the NDG discovery services methods exposed by the Discovery.wsdl
34    '''
35   
36    def __init__(self, tracefile=None, HostAndPort=None):
37        '''Get an instance of the NDG Discovery Service.
38            Optional Keywords are:
39                tracefile - for collecting debug output
40                HostAndPort - the location of the SOAP web service (default is ndg Glue) !crap! P.I.A.
41        '''
42        #Get a different backend provider via the url argument
43       
44        if not HostAndPort:
45            logging.info("Discovery service API URL not supplied by calling method so using: " + default_HostAndPort)
46            HostAndPort = default_HostAndPort
47       
48        loc=DiscoveryServiceLocator()
49        self.server=loc.getDiscoveryServicePortType(url=HostAndPort,
50                                                    readerclass=ExpatReaderClass,
51                                                    tracefile=tracefile)
52        self.serverSessionID=None
53        logging.info('Discovery web service connection to %s'%HostAndPort)
54        self.__reset()
55       
56    def __reset(self):
57        ''' Initialise and/or making sure we have no hangovers from a previous call '''
58        self.documents=None
59        self.hits=None
60        self.error=None
61        self.response=None
62        self.status=None
63   
64    def search(self, term, start=1, howmany=20, target='All',
65               scope=None, bbox=None, dateRange=None,
66               geoSearchType='overlaps', orderBy = None, orderDirection = None):
67        '''
68        Get a list of documents, with basic summary info for each doc, for a
69        specific term using a free text search
70        @param term: term to search on
71        @keyword start:  first record to return
72        @keyword howmany: number of records to return
73        @keyword target: target of search. Default = 'All'
74        @keyword scope: scope of search
75        @keyword bbox: bounding box for search data - NB, this should be an array with
76        content, [N, W, E, S]
77        @keyword dateRange: range of dates to use in search - an array with content,
78        [startDate, endDate]
79        @keyword geoSearchType: type of geospatial search to use - NB, currently only
80        'overlaps' accepted
81        @return: array of search results; if no results are found this array should be empty
82        '''
83        logging.info("Running search - with term, '%s'" %term)
84
85        self.__reset()
86        request=doSearchRequest()
87        request.Term=term
88        request.Start=start
89        request.HowMany=howmany
90        if orderBy:
91            request.set_element_orderBy(orderBy)
92
93        if orderDirection:
94            request.set_element_orderByDirection(orderDirection)
95
96        request.TermType={'Authors':'author','Params':'parameter','All':'fullText'}[target]
97        if bbox is not None:
98            box=request.new_BoundingBox()
99            box.LimitNorth,box.LimitWest,box.LimitEast,box.LimitSouth=map(float,bbox)
100            request.BoundingBox=box
101            request.SpatialOperator=geoSearchType
102       
103        if scope is not None:
104            request.Scope=scope
105       
106        if dateRange is not None:
107            start=int(dateRange[0][2]),int(dateRange[0][1]),int(dateRange[0][0]),0,0,0,0
108            end=int(dateRange[1][2]),int(dateRange[1][1]),int(dateRange[1][0]),0,0,0,0
109           
110            dRange=request.new_DateRange()
111            dRange.DateRangeStart=start
112            dRange.DateRangeEnd=end
113            request.DateRange=dRange
114
115        itime=time.time()
116        response=self.server.doSearch(request)
117        etime=time.time()-itime
118        logging.debug('Search Request [%s] took [%ss]'%(term,etime))
119
120        # NB, set default and only change this if response lists Hits - this
121        # can return 'None' - so isn't reliable var to set to without checking
122        self.hits = 0
123        if response.Hits:
124            self.hits = response.Hits
125           
126        if response._status:
127            self.serverSessionID=response._resultId
128            self.documents=response.Documents.Document
129       
130        logging.info("Search returned %s results: %s" %(self.hits, self.documents))
131        self.status=[response._statusMessage,]
132        self.response=response
133        logging.info("Search complete")
134        return self.documents
135
136
137    def __setUpRequestObject(self, documents, format):
138        '''
139        Set up a request object and set it up with the appropriate
140        Documents holder class and format
141        @param documents: documents to hold in request
142        @type documents: list
143        @param format: document format to use
144        @type format: str
145        @return: request object with Documents holder and format set up
146        '''
147        logging.debug("Setting up request object")
148        request=doPresentRequest()
149        request.Documents = request.new_documents()
150        request.Documents.Document = documents
151        request.Format = format
152        logging.debug("Request object set up")
153        return request
154
155
156    def getListNames(self):
157        '''
158        Get the various list names used by the web service
159        @return list with all the different listnames in
160        '''
161        logging.debug("Getting list names from discovery webservice")
162        # TODO: this current doesn't work - must be an error in the wsdl2python
163        # code generation....
164        request = getListNamesRequest()
165
166        names = self.server.getListNames(request)
167       
168        logging.debug("- returning list names")
169        return result
170
171
172    def getList(self, listName):
173        '''
174        Get the values for the specified list
175        @param listName: name of list to retrieve values for
176        @return list with the data for the specified list
177        '''
178        logging.debug("Getting data for list '%s'" %listName)
179        request = getListRequest()
180        request.set_element_listName(listName)
181
182        results = self.server.getList(request)
183        listData = results.get_element_list().get_element_listMember()
184       
185        logging.debug("- returning list names")
186        return listData
187
188
189    def __runDocumentPresent(self, request):
190        '''
191        Run a document retrieval, provided the request to use
192        @param request: request object to run the doPresent call with
193        @type request: request
194        @return: result - array of documents returned from call
195        @raise ValueError if document not retrieved properly
196        '''
197        logging.debug("Running 'doPresent()'")
198        itime=time.time()
199        searchResult=self.server.doPresent(request)
200        if searchResult._status:
201            result=searchResult.Documents.Document
202        else:
203            raise ValueError('Error during document retrieval: %s' \
204                             %searchResult._statusMessage)
205        etime=time.time()-itime
206        logging.debug('Document Load took [%ss]' %etime)
207        logging.debug("'doPresent() completed")
208        return result
209         
210
211    def getDoc(self, document, format='original'):
212        '''
213        Return a single document from the backend database
214        '''
215        logging.info("Retrieving document, '%s' in %s format" %(document, format))
216
217        request = self.__setUpRequestObject([document], format)
218
219        docs = self.__runDocumentPresent(request)
220        logging.info("Document retrieved successfully")
221        return docs[0]
222
223       
224    def getDocs(self, documents, format='original'):
225        '''
226        Return one or more complete document
227        @param documents: array of documents to retrieve
228        @keyword format: format the retrieved document should be in.  Default, 'original'
229        returns documents in their original format 
230        @raise ValueError if the requested document(s) cannot be found
231        @return: dictionary with format, {docName: docContents}
232        '''
233        logging.info("Retrieving docs: %s" %documents)
234        results = {}
235        for doc in documents:
236            results[doc] = self.getDoc(doc, format = format) 
237       
238        logging.info("- returning docs")
239        return results
240
241                   
242    def getAllDocs(self,format='original'):           
243        '''
244        Parse the list of documents, and retrieve them directly
245        '''
246        logging.info("Retrieving all documents specified")
247        logging.debug("- %s" %self.documents)
248        if self.documents is None:
249            logging.info("No documents specified to retrieve - returning")
250            return []
251       
252        request = self.__setUpRequestObject(self.documents, format)
253        docs = self.__runDocumentPresent(request)
254        logging.info("Documents retrieved successfully")
255        return docs
256
257           
258    def __xmlerror(self,docmessage):
259        print docmessage
260        return ET.fromstring('<Error><Document>%s</Document><Message>%s</Message></Error>'%docmessage)
261
262           
263    def getLabelledDocs(self,format='original'):
264        '''
265        Returns all the documents in sequence in a labelled list of strings
266        '''
267        logging.info("Retrieving all labelled documents")
268        if self.hits==0: 
269            return []
270
271        responses = self.getAllDocs(format)
272       
273        i=len(self.documents)
274        j=len(responses)
275        if i!=j:
276            logging.debug("Requested docs: %s\nActual results: %s" \
277                          %(self.documents, responses))
278            raise DiscoveryServiceClientError('Internal inconsistency in search \
279                return [hits:%s!=responses:%s]'%(i,j))
280        indices=range(i)
281        results=[]
282        for i in indices:
283            ff=os.path.splitext(self.documents[i])
284            print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% " + ff[0]
285            results.append((ff[0],responses[i]))
286        logging.info("Documents retrieved")
287        return results
288
289
290    def getNDGDoc(self, providerID, docType, localID, format='DIF', **kw):
291        '''
292        Retrieve the document represented by an ndgURI
293        @param providerID: ID of the provider of the metadata
294        @param docType: document type - see ndgObject for valid types
295        @param localID: local ID of the ndg document to retrieve
296        @keyword format: format to return document in
297        @return document matching the ndgURI
298        '''
299        uri='%s__%s__%s'%(providerID,docType,localID)
300        fileName=uri+'.xml'
301        return self.getDoc(fileName,format)
302
Note: See TracBrowser for help on using the repository browser.