source: ndgCommon/trunk/ndg/common/src/clients/ws/ndgSearch.py @ 4793

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/ws/ndgSearch.py@4832
Revision 4793, 9.1 KB checked in by cbyrom, 11 years ago (diff)

Checking in initial codebase for ndgUtils restructure.

Line 
1# these imports are autogenerated by wsdl2py ...
2from DiscoveryService_services_types import *
3from DiscoveryService_services import *
4
5#normal imports
6import sys, time, os.path, logging
7from xml.dom import expatbuilder
8from ndg.common.src.models.ndgObject import ndgObject as no
9from ndg.common.src.lib.ETxmlView import loadET,et2text
10
11class NDGSearchError(Exception):
12    """Exception handling for NDG Search class."""
13    def __init__(self, msg):
14        logging.error(msg)
15        Exception.__init__(self, msg)
16
17class ExpatReaderClass:
18      fromString = staticmethod(expatbuilder.parseString)
19      fromStream = staticmethod(expatbuilder.parse)
20#
21# Locator (this mimics the code which lives in the wsdl2py generated DiscoveryService_services,
22# but explicitly allowsa different port address) ...
23#
24
25default_HostAndPort="http://ndg.badc.rl.ac.uk:8080/axis2/services/DiscoveryService"
26
27class ndgSearch:
28    ''' Provides a client to the NDG discovery services methods exposed by the Discovery.wsdl '''
29   
30    def __init__(self, tracefile=None,HostAndPort=default_HostAndPort):
31        '''Get an instance of the NDG Discovery Service.
32            Optional Keywords are:
33                tracefile - for collecting debug output
34                HostAndPort - the location of the SOAP web service (default is ndg Glue)
35        '''
36        #Get a different backend provider via the url argument
37        loc=DiscoveryServiceLocator()
38        self.server=loc.getDiscoveryServicePortType(url=HostAndPort,
39                                    readerclass=ExpatReaderClass,tracefile=tracefile)
40        self.serverSessionID=None
41        logging.info('Discovery web service connection to %s'%HostAndPort)
42        self.__reset()
43       
44    def __reset(self):
45        ''' Initialise and/or making sure we have no hangovers from a previous call '''
46        self.documents=None
47        self.hits=None
48        self.error=None
49        self.response=None
50        self.status=None
51   
52    def search(self,term,start=1,howmany=20,target='All',scope=None,bbox=None,dateRange=None,geoSearchType='overlaps'):
53        ''' Get a list of documents for a specific term using a free text search'''
54        logging.info("Running search - with term, '%s'" %term)
55        self.__reset()
56        #if target=='Authors':
57        #    request=doAuthorSearchRequest()
58        #elif target=='Params':
59        #    request=doParameterSearchRequest()
60        #else:
61        #    request=doFullTextSearchRequest()
62        request=doSearchRequest()
63        request.Term=term
64        request.Start=start
65        request.HowMany=howmany
66        request.TermType={'Authors':'author','Params':'parameter','All':'fullText'}[target]
67        if bbox is not None:
68            box=request.new_BoundingBox()
69            box.LimitNorth,box.LimitWest,box.LimitEast,box.LimitSouth=map(float,bbox)
70            request.BoundingBox=box
71            request.SpatialOperator=geoSearchType
72       
73        if scope is not None:
74            request.Scope=scope
75       
76        if dateRange is not None:
77            start=int(dateRange[0][2]),int(dateRange[0][1]),int(dateRange[0][0]),0,0,0,0
78            end=int(dateRange[1][2]),int(dateRange[1][1]),int(dateRange[1][0]),0,0,0,0
79           
80            dRange=request.new_DateRange()
81            dRange.DateRangeStart=start
82            dRange.DateRangeEnd=end
83            request.DateRange=dRange
84
85        itime=time.time()
86        response=self.server.doSearch(request)
87        etime=time.time()-itime
88        logging.debug('Search Request [%s] took [%ss]'%(term,etime))
89
90        # NB, set default and only change this if response lists Hits - this
91        # can return 'None' - so isn't reliable var to set to without checking
92        self.hits = 0
93        if response.Hits:
94            self.hits = response.Hits
95           
96        if response._status:
97            self.serverSessionID=response._resultId
98            self.documents=response.Documents.Document
99       
100        logging.info("Search returned %s results: %s" %(self.hits, self.documents))
101        self.status=[response._statusMessage,]
102        self.response=response
103        logging.info("Search complete")
104        return self.documents
105
106
107    def __setUpRequestObject(self, documents, format):
108        '''
109        Set up a request object and set it up with the appropriate
110        Documents holder class and format
111        @param documents: documents to hold in request
112        @type documents: list
113        @param format: document format to use
114        @type format: str
115        @return: request object with Documents holder and format set up
116        '''
117        logging.debug("Setting up request object")
118        request=doPresentRequest()
119        request.Documents = request.new_documents()
120        request.Documents.Document = documents
121        request.Format = format
122        logging.debug("Request object set up")
123        return request
124
125
126    def __runDocumentPresent(self, request):
127        '''
128        Run a document retrieval, provided the request to use
129        @param request: request object to run the doPresent call with
130        @type request: request
131        @return: result - array of documents returned from call
132        @raise ValueError if document not retrieved properly
133        '''
134        logging.debug("Running 'doPresent()'")
135        itime=time.time()
136        searchResult=self.server.doPresent(request)
137        if searchResult._status:
138            result=searchResult.Documents.Document
139        else:
140            raise ValueError('Error during document retrieval: %s' \
141                             %searchResult._statusMessage)
142        etime=time.time()-itime
143        logging.debug('Document Load took [%ss]' %etime)
144        logging.debug("'doPresent() completed")
145        return result
146         
147
148    def getDoc(self,document,format='original'):
149        '''
150        Return a single document from the backend database
151        '''
152        logging.info("Retrieving document, '%s' in %s format" %(document, format))
153
154        request = self.__setUpRequestObject([document], format)
155
156        docs = self.__runDocumentPresent(request)
157        logging.info("Document retrieved successfully")
158        return docs[0]
159
160                   
161    def getAllDocs(self,format='original'):           
162        '''
163        Parse the list of documents, and retrieve them directly
164        '''
165        logging.info("Retrieving all documents specified")
166        logging.debug("- %s" %self.documents)
167        if self.documents is None:
168            logging.info("No documents specified to retrieve - returning")
169            return []
170       
171        request = self.__setUpRequestObject(self.documents, format)
172        docs = self.__runDocumentPresent(request)
173        logging.info("Documents retrieved successfully")
174        return docs
175           
176    def __xmlerror(self,docmessage):
177        print docmessage
178        return ET.fromstring('<Error><Document>%s</Document><Message>%s</Message></Error>'%docmessage)
179           
180    def getDocElement(self,document):
181        ''' Takes a document path (maybe from a previous call to ndgSearch) and extracts that document
182        into an ElementTree instance '''
183        #we stick it straight into element tree because we need to use et to get the actual document
184        #we want, not the envelope xml elements
185       
186        doc=self.getDoc(document)
187        path=document.rstrip('.xml')
188        try:
189            r=loadET(doc)
190            #return r.find(path)
191            return r
192        except:
193            return self.__xmlerror((path,doc))
194           
195    def getAllDocsAsElements(self):
196        '''
197        Get all the documents and load them into a list of ET instances
198        '''
199        result=[]
200        docs=self.getAllDocs()
201        for doc in docs: 
202            try:
203                r=loadET(doc)
204                result.append(r)#result.append(r.find(path))
205            except:
206                result.append(self.__xmlerror(doc))
207        return result
208           
209           
210    def getLabelledDocs(self,format='original'):
211        '''
212        Returns all the documents in sequence in a labelled list of strings
213        '''
214        logging.info("Retrieving all labelled documents")
215        if self.hits==0: return []
216        #filenames=self.documents.Document
217        #so we know that the following call is the problem ...
218        responses=self.getAllDocs(format)
219       
220        i=len(self.documents)
221        j=len(responses)
222        if i!=j:
223            logging.debug("Requested docs: %s\nActual results: %s" \
224                          %(self.documents, responses))
225            raise NDGSearchError('Internal inconsistency in search \
226                return [hits:%s!=responses:%s]'%(i,j))
227        indices=range(i)
228        results=[]
229        for i in indices:
230            ff=os.path.splitext(self.documents[i])
231            results.append((ff[0],responses[i]))
232        logging.info("Documents retrieved")
233        return results
234       
235    def get(self,repository,schema,localID,format='DIF',**kw):
236        ''' Obtain a document via it's NDG id split up '''
237        #nb argument targetCollection is here to provide same API as exist xmlrpc interface
238        uri='%s__%s__%s'%(repository,schema,localID)
239        fileName=uri+'.xml'
240        return self.getDoc(fileName,format)
241       
242       
243
Note: See TracBrowser for help on using the repository browser.