Changeset 4175
- Timestamp:
- 04/09/08 14:40:55 (13 years ago)
- Location:
- exist/trunk/python/ndgUtils
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
exist/trunk/python/ndgUtils/ndgObject.py
r4168 r4175 97 97 # set default return format - if not set, just return in original format 98 98 fmt='' 99 if self.config. has_option('DISCOVERY','formatDefault'):100 fmt=self.config. get('DISCOVERY','formatDefault')99 if self.config.config.has_option('DISCOVERY','formatDefault'): 100 fmt=self.config.config.get('DISCOVERY','formatDefault') 101 101 logging.info("Default discovery format set to: %s" %fmt) 102 102 … … 104 104 # We'll build the following even if it can't be used (as would be the case for 105 105 # a non data entity B record or an A record) because it's a useful template. 106 if self.schema!=fmt: qs=('outputSchema',fmt) 106 if self.schema!=fmt: 107 qs=('outputSchema',fmt) 107 108 self.discoveryURL=self.__buildURL(discoveryBASE,[qs]) 108 109 … … 110 111 # - if not, default to 'unknown' 111 112 servicehost = 'unknown' 112 if self.config. has_option('NDG_B_SERVICE',self.repository):113 if self.config.config.has_option('NDG_B_SERVICE',self.repository): 113 114 servicehost=self.config.get('NDG_B_SERVICE',self.repository) 114 115 -
exist/trunk/python/ndgUtils/ndgSearch.py
r3895 r4175 2 2 from DiscoveryService_services_types import * 3 3 from DiscoveryService_services import * 4 import os.path 4 import os.path, logging 5 5 6 6 #normal imports … … 20 20 from xml.dom import expatbuilder 21 21 22 class NDGSearchError(Exception): 23 """Exception handling for NDG Search class.""" 24 def __init__(self, msg): 25 logging.error(msg) 26 Exception.__init__(self, msg) 27 22 28 class ExpatReaderClass: 23 29 fromString = staticmethod(expatbuilder.parseString) … … 33 39 ''' Provides a client to the NDG discovery services methods exposed by the Discovery.wsdl ''' 34 40 35 def __init__(self, logger=None,tracefile=None,HostAndPort=default_HostAndPort):41 def __init__(self, tracefile=None,HostAndPort=default_HostAndPort): 36 42 '''Get an instance of the NDG Discovery Service. 37 43 Optional Keywords are: 38 logger - a python logging instance, for collecting stats39 44 tracefile - for collecting debug output 40 45 HostAndPort - the location of the SOAP web service (default is ndg Glue) … … 45 50 readerclass=ExpatReaderClass,tracefile=tracefile) 46 51 self.serverSessionID=None 47 self.logger=logger 48 if self.logger: 49 logger.info('Discovery web service connection to %s'%HostAndPort) 52 logging.info('Discovery web service connection to %s'%HostAndPort) 50 53 self.__reset() 51 54 … … 60 63 def search(self,term,start=1,howmany=20,target='All',scope=None,bbox=None,dateRange=None,geoSearchType='overlaps'): 61 64 ''' Get a list of documents for a specific term using a free text search''' 62 65 logging.info("Running search - with term, '%s'" %term) 63 66 self.__reset() 64 67 #if target=='Authors': … … 91 94 request.DateRange=dRange 92 95 93 i f self.logger: itime=time.time()96 itime=time.time() 94 97 response=self.server.doSearch(request) 95 96 if self.logger: 97 etime=time.time()-itime 98 self.logger.info('Search Request [%s] took [%ss]'%(term,etime)) 98 etime=time.time()-itime 99 logging.debug('Search Request [%s] took [%ss]'%(term,etime)) 100 99 101 if response._status: 100 102 self.serverSessionID=response._resultId … … 103 105 else: 104 106 self.hits=response.Hits 107 108 logging.info("Search returned %s results: %s" %(self.hits, self.documents)) 105 109 self.status=[response._statusMessage,] 106 110 self.response=response 111 logging.info("Search complete") 107 112 return self.documents 108 113 114 115 def __setUpRequestObject(self, documents, format): 116 ''' 117 Set up a request object and set it up with the appropriate 118 Documents holder class and format 119 @param documents: documents to hold in request 120 @type documents: list 121 @param format: document format to use 122 @type format: str 123 @return: request object with Documents holder and format set up 124 ''' 125 logging.debug("Setting up request object") 126 request=doPresentRequest() 127 request.Documents = request.new_documents() 128 request.Documents.Document = documents 129 request.Format = format 130 logging.debug("Request object set up") 131 return request 132 133 134 def __runDocumentPresent(self, request): 135 ''' 136 Run a document retrieval, provided the request to use 137 @param request: request object to run the doPresent call with 138 @type request: request 139 @return: result - array of documents returned from call 140 @raise ValueError if document not retrieved properly 141 ''' 142 logging.debug("Running 'doPresent()'") 143 itime=time.time() 144 searchResult=self.server.doPresent(request) 145 if searchResult._status: 146 result=searchResult.Documents.Document 147 else: 148 raise ValueError('Error during document retrieval: %s' \ 149 %searchResult._statusMessage) 150 etime=time.time()-itime 151 logging.debug('Document Load took [%ss]' %etime) 152 logging.debug("'doPresent() completed") 153 return result 154 155 109 156 def getDoc(self,document,format='original'): 110 ''' Return a single document from the backend database ''' 111 112 #create a request object 113 request=doPresentRequest() 114 #get an instance of the Documents holder class 115 DocList=request.new_documents() 116 request.Documents=DocList 117 DocList.Document=[document,] 118 request.Format=format 119 #try: 120 searchResult=self.server.doPresent(request) 121 #except Exception,e: 122 # raise Exception,e 123 if searchResult._status: 124 result=searchResult.Documents.Document[0] 125 else: 126 raise ValueError('Error retrieving [%s] was [%s]'%(document,searchResult._statusMessage)) 127 #result=searchResult._statusMessage 128 return result 157 ''' 158 Return a single document from the backend database 159 ''' 160 logging.info("Retrieving document, '%s' in %s format" %(document, format)) 161 162 request = self.__setUpRequestObject([document], format) 163 164 docs = self.__runDocumentPresent(request) 165 logging.info("Document retrieved successfully") 166 return docs[0] 167 129 168 130 169 def getAllDocs(self,format='original'): 131 ''' Parse the list of documents, and retrieve them directly ''' 132 133 if self.documents is None: return [] 134 if self.logger: itime=time.time() 135 136 #create a request object 137 request=doPresentRequest() 138 #get an instance of the Documents holder class 139 DocList=request.new_documents() 140 request.Documents=DocList 141 DocList.Document=self.documents 142 request.Format=format 143 result=self.server.doPresent(request) 144 if result._status: 145 docs=result.Documents.Document 146 else: 147 raise ValueError('Error retrieving [%s] was [%s]'% 148 (self.documents,result._statusMessage)) 149 if self.logger: 150 etime=time.time()-itime 151 self.logger.info('Document Load [n=%s] took [%ss]'%(len(self.documents),etime)) 152 170 ''' 171 Parse the list of documents, and retrieve them directly 172 ''' 173 logging.info("Retrieving all documents specified") 174 logging.debug("- %s" %self.documents) 175 if self.documents is None: 176 logging.info("No documents specified to retrieve - returning") 177 return [] 178 179 request = self.__setUpRequestObject(self.documents, format) 180 docs = self.__runDocumentPresent(request) 181 logging.info("Documents retrieved successfully") 153 182 return docs 154 183 … … 173 202 174 203 def getAllDocsAsElements(self): 175 ''' Get all the documents and load them into a list of ET instances ''' 204 ''' 205 Get all the documents and load them into a list of ET instances 206 ''' 176 207 result=[] 177 208 docs=self.getAllDocs() … … 186 217 187 218 def getLabelledDocs(self,format='original'): 188 ''' Returns all the documents in sequence in a labelled list of strings''' 219 ''' 220 Returns all the documents in sequence in a labelled list of strings 221 ''' 222 logging.info("Retrieving all labelled documents") 189 223 if self.hits==0: return [] 190 224 #filenames=self.documents.Document 191 225 #so we know that the following call is the problem ... 192 226 responses=self.getAllDocs(format) 193 filenames=self.documents 194 i=len(filenames) 227 i=len(self.documents) 195 228 j=len(responses) 196 229 if i!=j: 197 print filenames 198 raise ValueError,'Internal inconsistency in search return [hits:%s!=responses:%s]'%(i,j) 230 logging.debug("Requested docs: %s\nActual results: %s" \ 231 %(self.documents, responses)) 232 raise NDGSearchError('Internal inconsistency in search \ 233 return [hits:%s!=responses:%s]'%(i,j)) 199 234 indices=range(i) 200 235 results=[] 201 236 for i in indices: 202 ####results.append((filenames[i].strip('.xml'),responses[i])) 203 ####Wow, 'higem.xml'.strip('.xml') = hige ... and that's what split is supposed to do! 204 ff=os.path.splitext(filenames[i]) 237 ff=os.path.splitext(self.documents[i]) 205 238 results.append((ff[0],responses[i])) 239 logging.info("Documents retrieved") 206 240 return results 207 241 -
exist/trunk/python/ndgUtils/test.config
r4167 r4175 5 5 [NDG_B_SERVICE] 6 6 badc.nerc.ac.uk:chinook.badc.rl.ac.uk 7 www.npm.ac.uk: www.npm.ac.uk7 www.npm.ac.uk:pgsql.npm.ac.uk 8 8 instance:chinook.badc.rl.ac.uk 9 9 -
exist/trunk/python/ndgUtils/test_ndgRetrieve.py
r4167 r4175 62 62 self.assertNotEqual(xml.find(tc.VALID_MOLES_DOC_ID), -1) 63 63 64 def offtestRemoteGet(self):64 def OFFtestRemoteGet(self): 65 65 ''' Attempt a remote get ''' 66 66 # TODO: not sure what remote repository to use for this? -
exist/trunk/python/ndgUtils/test_ndgSearch.py
r3128 r4175 1 import unittest 1 import unittest, logging 2 import testconstants as tc 3 import ndgObject as no 2 4 from ndgSearch import ndgSearch 5 6 logging.basicConfig(level=logging.DEBUG, 7 format='%(asctime)s %(filename)s:%(lineno)d %(levelname)s %(message)s') 3 8 4 9 class TestCase(unittest.TestCase): … … 14 19 def testgetLabelledDocs(self): 15 20 ''' Test returning of labelled documents ''' 16 term=' CD97'21 term='dppp' 17 22 s=ndgSearch() 18 23 r=s.search(term) 19 24 output=s.getLabelledDocs() 20 nocsfound=0 21 for i in output: 22 if 'noc.soton' in i[0]: nocsfound=1 23 self.assertEqual(nocsfound,1) 25 self.assertNotEqual(len(output), 0) 24 26 25 27 def testNoReturn(self): … … 32 34 def testGetDoc(self): 33 35 ''' Test obtaining a specific document which had better exist ''' 34 doc='badc.nerc.ac.uk__DIF__dataent_COAPEC.xml' 35 s=ndgSearch() 36 r=s.getDoc(doc) 37 self.assertEqual('<DIF',r[0:4]) 36 doc= tc.VALID_DISCOVERY_PROVIDER_ID + "__" + no.DIF_DOC_TYPE + \ 37 "__" + tc.VALID_DISCOVERY_DOC_ID + ".xml" 38 s=ndgSearch() 39 r=s.getDoc(doc) 40 self.assertNotEqual(r.find('<DIF'), -1) 38 41 39 42 def testSequence(self): … … 55 58 ''' Tests getting via uri components ''' 56 59 57 (r,s,l)='badc.nerc.ac.uk','DIF','dataent_COAPEC' 60 (r,s,l)=tc.VALID_DISCOVERY_PROVIDER_ID, \ 61 no.DIF_DOC_TYPE, tc.VALID_DISCOVERY_DOC_ID 58 62 ss=ndgSearch() 59 63 x=ss.get(r,s,l,format='DC') 64 self.assertNotEqual(x.find('<oai_dc'), -1) 60 65 61 66 def testFailedGet(self): … … 65 70 r=s.getDoc(doc) 66 71 except ValueError,e: 67 self.assertEqual(str(e),'Error retrieving [abc] was [Error retrieving all documents]') 72 self.assertEqual(str(e),\ 73 'Error during document retrieval: Document retrieval was successful but generated no results.') 68 74 except Exception,e: 69 75 self.fail(str(e)) -
exist/trunk/python/ndgUtils/testconstants.py
r4167 r4175 19 19 VALID_NUMSIM_DOC_ID = 'HadGEM1_CodeBase' 20 20 VALID_ATOM_DOC_ID = 'dataent_11679938403412067' 21 21 22 VALID_DISCOVERY_PROVIDER_ID = "csl.gov.uk" 23 VALID_DISCOVERY_DOC_ID = "csl.gov.uk__1"
Note: See TracChangeset
for help on using the changeset viewer.