1 | # these imports are autogenerated by wsdl2py ... |
---|
2 | from DiscoveryService_services_types import * |
---|
3 | from DiscoveryService_services import * |
---|
4 | |
---|
5 | #normal imports |
---|
6 | import sys, time |
---|
7 | try: #python 2.5 |
---|
8 | from xml.etree import ElementTree as ET |
---|
9 | except ImportError: |
---|
10 | try: |
---|
11 | # if you've installed it yourself it comes this way |
---|
12 | import ElementTree as ET |
---|
13 | except ImportError: |
---|
14 | # if you've egged it this is the way it comes |
---|
15 | from elementtree import ElementTree as ET |
---|
16 | |
---|
17 | from ETxmlView import loadET,et2text |
---|
18 | |
---|
19 | from xml.dom import expatbuilder |
---|
20 | |
---|
21 | class ExpatReaderClass: |
---|
22 | fromString = staticmethod(expatbuilder.parseString) |
---|
23 | fromStream = staticmethod(expatbuilder.parse) |
---|
24 | |
---|
25 | |
---|
26 | class ndgSearch: |
---|
27 | ''' Provides a client to the NDG discovery services methods exposed by the Discovery.wsdl ''' |
---|
28 | |
---|
29 | def __init__(self,logger=None,tracefile=None): |
---|
30 | '''get an instance of the service''' |
---|
31 | #how do we get a different backend provider? |
---|
32 | loc=DiscoveryServiceLocator() |
---|
33 | #self.server=loc.getDiscovery(readerclass=ExpatReaderClass, |
---|
34 | self.server=loc.getDiscoveryServicePortType(readerclass=ExpatReaderClass,tracefile=tracefile) |
---|
35 | self.serverSessionID=None |
---|
36 | self.logger=logger |
---|
37 | self.__reset() |
---|
38 | |
---|
39 | def __reset(self): |
---|
40 | ''' Initialise and/or making sure we have no hangovers from a previous call ''' |
---|
41 | self.documents=None |
---|
42 | self.hits=None |
---|
43 | self.error=None |
---|
44 | self.response=None |
---|
45 | self.status=None |
---|
46 | |
---|
47 | def search(self,term,start=1,howmany=20,target='All',scope=None,bbox=None,dateRange=None,): |
---|
48 | ''' Get a list of documents for a specific term using a free text search''' |
---|
49 | |
---|
50 | self.__reset() |
---|
51 | #if target=='Authors': |
---|
52 | # request=doAuthorSearchRequest() |
---|
53 | #elif target=='Params': |
---|
54 | # request=doParameterSearchRequest() |
---|
55 | #else: |
---|
56 | # request=doFullTextSearchRequest() |
---|
57 | request=doSearchRequest() |
---|
58 | request.Term=term |
---|
59 | request.Start=start |
---|
60 | request.HowMany=howmany |
---|
61 | request.TermType={'Authors':'author','Params':'parameter','All':'fullText'}[target] |
---|
62 | if bbox is not None: |
---|
63 | box=request.new_BoundingBox() |
---|
64 | box.LimitNorth,box.LimitSouth,box.LimitWest,box.LimitEast=map(float,bbox) |
---|
65 | request.BoundingBox=box |
---|
66 | |
---|
67 | if scope is not None: |
---|
68 | request.Scope=scope |
---|
69 | |
---|
70 | if dateRange is not None: |
---|
71 | start=int(dateRange[0][2]),int(dateRange[0][1]),int(dateRange[0][0]),0,0,0,0 |
---|
72 | end=int(dateRange[1][2]),int(dateRange[1][1]),int(dateRange[1][0]),0,0,0,0 |
---|
73 | |
---|
74 | dRange=request.new_DateRange() |
---|
75 | dRange.DateRangeStart=start |
---|
76 | dRange.DateRangeEnd=end |
---|
77 | request.DateRange=dRange |
---|
78 | |
---|
79 | if self.logger: itime=time.time() |
---|
80 | response=self.server.doSearch(request) |
---|
81 | |
---|
82 | if self.logger: |
---|
83 | etime=time.time()-itime |
---|
84 | self.logger.info('Search Request [%s] took [%ss]'%(term,etime)) |
---|
85 | if response._status: |
---|
86 | self.serverSessionID=response._resultId |
---|
87 | self.hits=response.Hits |
---|
88 | self.documents=response.Documents.Document |
---|
89 | else: |
---|
90 | self.hits=response.Hits |
---|
91 | self.status=[response._statusMessage,] |
---|
92 | self.response=response |
---|
93 | return self.documents |
---|
94 | |
---|
95 | def getDoc(self,document,format='original'): |
---|
96 | ''' Return a single document from the backend database ''' |
---|
97 | |
---|
98 | #create a request object |
---|
99 | request=doPresentRequest() |
---|
100 | #get an instance of the Documents holder class |
---|
101 | DocList=request.new_documents() |
---|
102 | request.Documents=DocList |
---|
103 | DocList.Document=[document,] |
---|
104 | request.Format=format |
---|
105 | #try: |
---|
106 | searchResult=self.server.doPresent(request) |
---|
107 | #except Exception,e: |
---|
108 | # raise Exception,e |
---|
109 | if searchResult._status: |
---|
110 | result=searchResult.Documents.Document[0] |
---|
111 | else: |
---|
112 | raise ValueError('Error retrieving [%s] was [%s]'%(document,searchResult._statusMessage)) |
---|
113 | #result=searchResult._statusMessage |
---|
114 | return result |
---|
115 | |
---|
116 | def getAllDocs(self,format='original'): |
---|
117 | ''' Parse the list of documents, and retrieve them directly ''' |
---|
118 | |
---|
119 | if self.documents is None: return [] |
---|
120 | if self.logger: itime=time.time() |
---|
121 | |
---|
122 | #create a request object |
---|
123 | request=doPresentRequest() |
---|
124 | #get an instance of the Documents holder class |
---|
125 | DocList=request.new_documents() |
---|
126 | request.Documents=DocList |
---|
127 | DocList.Document=self.documents |
---|
128 | request.Format=format |
---|
129 | result=self.server.doPresent(request) |
---|
130 | if result._status: |
---|
131 | docs=result.Documents.Document |
---|
132 | else: |
---|
133 | raise ValueError('Error retrieving [%s] was [%s]'% |
---|
134 | (self.documents,result._statusMessage)) |
---|
135 | if self.logger: |
---|
136 | etime=time.time()-itime |
---|
137 | self.logger.info('Document Load [n=%s] took [%ss]'%(len(self.documents),etime)) |
---|
138 | |
---|
139 | return docs |
---|
140 | |
---|
141 | def __xmlerror(self,docmessage): |
---|
142 | print docmessage |
---|
143 | return ET.fromstring('<Error><Document>%s</Document><Message>%s</Message></Error>'%docmessage) |
---|
144 | |
---|
145 | def getDocElement(self,document): |
---|
146 | ''' Takes a document path (maybe from a previous call to ndgSearch) and extracts that document |
---|
147 | into an ElementTree instance ''' |
---|
148 | #we stick it straight into element tree because we need to use et to get the actual document |
---|
149 | #we want, not the envelope xml elements |
---|
150 | |
---|
151 | doc=self.getDoc(document) |
---|
152 | path=document.rstrip('.xml') |
---|
153 | try: |
---|
154 | r=loadET(doc) |
---|
155 | #return r.find(path) |
---|
156 | return r |
---|
157 | except: |
---|
158 | return self.__xmlerror((path,doc)) |
---|
159 | |
---|
160 | def getAllDocsAsElements(self): |
---|
161 | ''' Get all the documents and load them into a list of ET instances ''' |
---|
162 | result=[] |
---|
163 | docs=self.getAllDocs() |
---|
164 | for doc in docs: |
---|
165 | try: |
---|
166 | r=loadET(doc) |
---|
167 | result.append(r)#result.append(r.find(path)) |
---|
168 | except: |
---|
169 | result.append(self.__xmlerror(doc)) |
---|
170 | return result |
---|
171 | |
---|
172 | |
---|
173 | def getLabelledDocs(self,format='original'): |
---|
174 | ''' Returns all the documents in sequence in a labelled list of strings''' |
---|
175 | if self.hits==0: return [] |
---|
176 | #filenames=self.documents.Document |
---|
177 | #so we know that the following call is the problem ... |
---|
178 | responses=self.getAllDocs(format) |
---|
179 | filenames=self.documents |
---|
180 | i=len(filenames) |
---|
181 | j=len(responses) |
---|
182 | if i!=j: |
---|
183 | print filenames |
---|
184 | raise ValueError,'Internal inconsistency in search return [hits:%s!=responses:%s]'%(i,j) |
---|
185 | indices=range(i) |
---|
186 | results=[] |
---|
187 | for i in indices: |
---|
188 | results.append((filenames[i].strip('.xml'),responses[i])) |
---|
189 | return results |
---|
190 | |
---|
191 | def get(self,repository,schema,localID,format='original',targetCollection=None): |
---|
192 | ''' Obtain a document via it's NDG id split up ''' |
---|
193 | #nb argument targetCollection is here to provide same API as exist xmlrpc interface |
---|
194 | uri='%s__%s__%s'%(repository,schema,localID) |
---|
195 | fileName=uri+'.xml' |
---|
196 | return self.getDoc(fileName,format) |
---|
197 | |
---|
198 | |
---|
199 | import unittest |
---|
200 | |
---|
201 | class TestCase(unittest.TestCase): |
---|
202 | |
---|
203 | def testSearch(self): |
---|
204 | ''' Test fundamental search capability ''' |
---|
205 | term='temperature' |
---|
206 | s=ndgSearch() |
---|
207 | howmany=10 |
---|
208 | docs=s.search(term,start=1,howmany=howmany) |
---|
209 | print 'Asked for ',howmany,' documents (there were %s hits):'%s.hits |
---|
210 | print s.documents |
---|
211 | |
---|
212 | def testgetLabelledDocs(self): |
---|
213 | ''' Test returning of labelled documents ''' |
---|
214 | term='CD97' |
---|
215 | s=ndgSearch() |
---|
216 | r=s.search(term) |
---|
217 | print s.status |
---|
218 | print s.documents |
---|
219 | output=s.getLabelledDocs() |
---|
220 | |
---|
221 | def testNoReturn(self): |
---|
222 | ''' Tests a search return with (hopefully nothing to be found)''' |
---|
223 | term='xpabnl' |
---|
224 | s=ndgSearch() |
---|
225 | r=s.search(term) |
---|
226 | print 'Hopefully this is zero: if not, expect the NoReturn test to fail:',s.hits |
---|
227 | output=s.getLabelledDocs() |
---|
228 | self.assertEqual(len(output),0) |
---|
229 | |
---|
230 | def testGetDoc(self): |
---|
231 | ''' Test obtaining a specific document which had better exist ''' |
---|
232 | doc='noc.soton.ac.uk__DIF__NOCSDAT100.xml' |
---|
233 | #doc='ndg.noc.soton.ac.uk__DIF__NOCSDAT274.xml' |
---|
234 | s=ndgSearch() |
---|
235 | r=s.getDoc(doc) |
---|
236 | |
---|
237 | def testSequence(self): |
---|
238 | ''' Tests that repeated searches work and can support zero responses in the middle ''' |
---|
239 | # this was a bug we found and needed a test case for |
---|
240 | s=ndgSearch() |
---|
241 | term='CD97' # hopefully just get a couple of hits for now. |
---|
242 | r=s.search(term) |
---|
243 | print s.hits |
---|
244 | #if we uncomment this it used to break, and not if uncommented, now it seems ok... |
---|
245 | output=s.getLabelledDocs() |
---|
246 | #this failed as well, and we isolated the dependency in the previous call to this one. |
---|
247 | #res=s.getAllDocs() |
---|
248 | term='xpabnl' |
---|
249 | r=s.search(term) |
---|
250 | print s.hits |
---|
251 | print 'Hopefully this is also zero: if not, expect the Sequence test to fail:',s.hits |
---|
252 | |
---|
253 | def testGet(self): |
---|
254 | ''' Tests getting via uri components ''' |
---|
255 | (r,s,l)='neodc.nerc.ac.uk','DIF','NEODC_ARSF_ATM_DAED' |
---|
256 | ss=ndgSearch() |
---|
257 | x=ss.get(r,s,l,format='DC') |
---|
258 | |
---|
259 | if __name__=="__main__": |
---|
260 | unittest.main() |
---|