1 | # Code inspired by example on eXist website. |
---|
2 | import urllib2, base64, urllib, urlparse, httplib, xmlrpclib, types, os |
---|
3 | from xmlHandler2 import xmlHandler as xmlh |
---|
4 | import ndg_xqueries as xq |
---|
5 | |
---|
6 | class InstanceObject(object): |
---|
7 | def __init__(self, **kw): |
---|
8 | self.dict={} |
---|
9 | self.dict.update(kw) |
---|
10 | def __getattr__(self,arg): |
---|
11 | return self.dict[arg] |
---|
12 | def __str__(self): |
---|
13 | return 'InstanceObject: %s '%self.dict |
---|
14 | |
---|
15 | class edict(dict): |
---|
16 | '''An extended dictionary which allows one to set and get values |
---|
17 | as attributes (kudos Joe Gregorio's 1812) |
---|
18 | The extended part allows you to get and set values as attributes. |
---|
19 | That is, |
---|
20 | d.fred |
---|
21 | is the same as |
---|
22 | d['fred'] |
---|
23 | ''' |
---|
24 | def __init__(self,**kw): |
---|
25 | for a in kw: |
---|
26 | self[a]=kw[a] |
---|
27 | def __getattr__(self, key): |
---|
28 | try: |
---|
29 | return self.__dict__[key] |
---|
30 | except KeyError: |
---|
31 | pass |
---|
32 | try: |
---|
33 | assert not key.startswith('_') |
---|
34 | return self.__getitem__(key) |
---|
35 | except: |
---|
36 | raise AttributeError, "object has no attribute '%s'" % key |
---|
37 | def __setattr__(self, key, value): |
---|
38 | if key.startswith('_'): |
---|
39 | self.__dict__[key] = value |
---|
40 | else: |
---|
41 | return self.__setitem__(key, value) |
---|
42 | |
---|
43 | |
---|
44 | class eXist_Connector(object): |
---|
45 | """Access class for eXist""" |
---|
46 | def __init__(self,constants=None): |
---|
47 | ''' Instantiates the eXist connector using supplied constants ''' |
---|
48 | if constants is None: raise 'NoExistConstants' |
---|
49 | authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm() |
---|
50 | authinfo.add_password(None, |
---|
51 | constants.host, |
---|
52 | constants.userid, |
---|
53 | constants.password) |
---|
54 | authHandler = urllib2.HTTPBasicAuthHandler(authinfo) |
---|
55 | opener = urllib2.build_opener(authHandler) |
---|
56 | s = constants.userid+':'+constants.password |
---|
57 | z = base64.encodestring(s)[:-1] # strip trailing 12 |
---|
58 | opener.addheaders.append(('Authorization', 'Basic %s' % z)) |
---|
59 | self.http_headers = {'Authorization':'Basic %s' % z} |
---|
60 | self.opener = opener |
---|
61 | # also create an xmlrpc Server object |
---|
62 | |
---|
63 | xmlrpc_uri = '%s%s:%s@%s:%d%s' % ( |
---|
64 | 'http://', |
---|
65 | constants.userid, |
---|
66 | constants.password, |
---|
67 | constants.host, |
---|
68 | constants.port, |
---|
69 | constants.xmlrpc_base_path |
---|
70 | ) |
---|
71 | self.xmlrpc = xmlrpclib.Server(xmlrpc_uri) |
---|
72 | |
---|
73 | def executeQuery(self, xquery, params={}): |
---|
74 | '''Execute an xquery string, return session and summary information''' |
---|
75 | xquery=xmlrpclib.Binary(xquery) |
---|
76 | id = self.xmlrpc.executeQuery(xquery, params) |
---|
77 | summary = self.xmlrpc.querySummary(id) |
---|
78 | return id,summary |
---|
79 | |
---|
80 | def release(self,id): |
---|
81 | ''' Release an executeQuery session ''' |
---|
82 | self.xmlrpc.releaseQueryResult(id) |
---|
83 | |
---|
84 | def retrieve(self,id,pos,params={}): |
---|
85 | ''' Retrieve a specific document from an executeQuery result set ''' |
---|
86 | return self.xmlrpc.retrieve(id,pos,params).data |
---|
87 | |
---|
88 | def executeChunkedQuery(self,xquery,start,number,params={}): |
---|
89 | ''' Execute a query, return a specific part of the result set, and |
---|
90 | dump the session automagically ''' |
---|
91 | xquery=xmlrpclib.Binary(xquery) |
---|
92 | r=self.xmlrpc.query(xquery,number,start,params) |
---|
93 | return r |
---|
94 | |
---|
95 | def querySummary(self,id): |
---|
96 | ''' Returns a summary of query results for the result-set referenced by id (which was returned by a previous query ''' |
---|
97 | return self.xmlrpc.querySummary(id) |
---|
98 | |
---|
99 | def getHits(self,id): |
---|
100 | ''' Return the number of hits associated with the query that created session id ''' |
---|
101 | return self.xmlrpc.getHits(id) |
---|
102 | |
---|
103 | def getDoc(self,collectionName,documentName): |
---|
104 | ''' Lightweight interface to the getDocument method ''' |
---|
105 | name='%s/%s'%(collectionName,documentName) |
---|
106 | r=self.xmlrpc.getDocumentAsString(name,{}) |
---|
107 | return r |
---|
108 | |
---|
109 | # unfortunately it looks like the exist server doesn't support introspection |
---|
110 | # def showMethods(self): |
---|
111 | # print self.xmlrpc.system.listMethods() |
---|
112 | |
---|
113 | class ndg_eXist(eXist_Connector): |
---|
114 | ''' Adds ndg methods to a "standard" exist Connector ''' |
---|
115 | def __init__(self,db='glue.badc.rl.ac.uk',passwordFile='passwords.txt'): |
---|
116 | try: |
---|
117 | f=file(passwordFile,'r') |
---|
118 | except IOError,e: |
---|
119 | raise IOError('%s [looking for %s in %s]'%(e,passwordFile,os.getcwd())) |
---|
120 | |
---|
121 | pw={} |
---|
122 | for line in f.readlines(): |
---|
123 | host,userid,password=line.strip().split(' ') |
---|
124 | pw[host]=(userid,password) |
---|
125 | if db not in pw: |
---|
126 | raise ValueError('Unable to find eXist password for repository [%s]'%db) |
---|
127 | eXistConstants = InstanceObject(host=db, |
---|
128 | userid=pw[db][0], |
---|
129 | password=pw[db][1], |
---|
130 | base_path="/exist/servlet", |
---|
131 | xmlrpc_base_path="/exist/xmlrpc", |
---|
132 | port=8080) |
---|
133 | |
---|
134 | eXist_Connector.__init__(self,eXistConstants) |
---|
135 | self.ids={} |
---|
136 | |
---|
137 | def __buildquery(self,query,target=None): |
---|
138 | '''Create an NDG full text query ''' |
---|
139 | if target is not None: |
---|
140 | s='''let $hits := collection('/db/%s')//root()[. &= '%s'] |
---|
141 | for $i in $hits |
---|
142 | return <document>{document-uri($i)}</document>'''%(target,query) |
---|
143 | else: s="/*[. &='%s']"%query |
---|
144 | |
---|
145 | return s#xmlrpclib.Binary(s) |
---|
146 | |
---|
147 | def __buildParamSearch(self,param,value,target): |
---|
148 | s="for $x in document()//%s where $x[.%s &= '%s'] return $x"%(target,param,value) |
---|
149 | return s |
---|
150 | |
---|
151 | def full_text(self,query,target=None): |
---|
152 | ''' Carry out a full text search within the "target" collection ''' |
---|
153 | id,summary=self.executeQuery(self.__buildquery(query,target)) |
---|
154 | self.ids[id]=0 |
---|
155 | return id,summary |
---|
156 | |
---|
157 | def retrieveNext(self,id,pos=None): |
---|
158 | ''' Takes a sessionID from an existing query and gets the next document ''' |
---|
159 | if pos is not None: self.ids[id]=pos |
---|
160 | try: |
---|
161 | r=self.retrieve(id, self.ids[id]) |
---|
162 | self.ids[id]+=1 |
---|
163 | return r |
---|
164 | except xmlrpclib.Fault: |
---|
165 | return None |
---|
166 | except KeyError: |
---|
167 | return None |
---|
168 | |
---|
169 | def sessionRelease(self,id): |
---|
170 | ''' Releases a session and removes the position counter ''' |
---|
171 | try: |
---|
172 | self.release(id) |
---|
173 | del self.ids[id] |
---|
174 | return 1 |
---|
175 | except: |
---|
176 | return 0 |
---|
177 | |
---|
178 | def chunkedFullText(self,query,start=1,number=10,target='DIF'): |
---|
179 | ''' Execute a chunked full text query and return the result |
---|
180 | set ''' |
---|
181 | return self.executeChunkedQuery(self.__buildquery(query,target),start,number,params={}) |
---|
182 | |
---|
183 | |
---|
184 | def getDIF(self,entryID): |
---|
185 | ''' Get a specific DIF document from a repository by using the entryID ''' |
---|
186 | xq='''for $DE in collection()/DIF[Entry_ID='%s'] return $DE'''%entryID |
---|
187 | xquery='''for $DE in collection('/db/testdif1')/DIF[Entry_ID='%s'] return $DE'''%entryID |
---|
188 | id,summary=self.executeQuery(xq)#xquery) |
---|
189 | if summary['hits']==1: |
---|
190 | r=self.retrieve(id,0,{}) |
---|
191 | self.sessionRelease(id) |
---|
192 | else: |
---|
193 | r=summary['hits'] |
---|
194 | return r |
---|
195 | |
---|
196 | def search(self,term,start=1,howmany=20,target=None,scope=None,bbox=None,dateRange=None,geoSearchType=None): |
---|
197 | ''' Provides a search interface that mimics the WSDL search interface, except that |
---|
198 | the target used is the exist collection name, and scope, bbox and dateRange are ignored, |
---|
199 | and a python summary object is returned ''' |
---|
200 | xquery={'ndg_B_metadata':xq.molesSearchSummary,'NumSim':xq.numsimSummary}[target] |
---|
201 | xquery=xquery.replace('SEARCHSTRING',term) |
---|
202 | r=self.executeChunkedQuery(xquery,start,howmany) |
---|
203 | print r |
---|
204 | x=xmlh(str(r),string=1) |
---|
205 | h=x.tree.get('hits') |
---|
206 | self.results=[] |
---|
207 | self.serverSessionID='' |
---|
208 | if h is None: |
---|
209 | self.hits=0 |
---|
210 | self.start=0 |
---|
211 | self.howmany=0 |
---|
212 | self.error=['No results for [%s]'%term,] |
---|
213 | else: |
---|
214 | self.hits=int(h) |
---|
215 | self.error=None |
---|
216 | self.start=int(x.tree.get('start')) |
---|
217 | self.howmany=int(x.tree.get('count')) |
---|
218 | slist=x.tree.findall('summary') |
---|
219 | for s in slist: |
---|
220 | t=edict(id=s.find('id').text,name=s.find('name').text,type=s.find('type').text) |
---|
221 | self.results.append(t) |
---|
222 | return self.results |
---|
223 | |
---|
224 | import unittest |
---|
225 | |
---|
226 | class TestCase(unittest.TestCase): |
---|
227 | |
---|
228 | def testFullText(self): |
---|
229 | |
---|
230 | ''' Exercises some of the methods based on something we hope might exist in the database ''' |
---|
231 | |
---|
232 | existDB=ndg_eXist(db='glue.badc.rl.ac.uk') |
---|
233 | id,summary=existDB.full_text('neodc') |
---|
234 | |
---|
235 | r=existDB.retrieveNext(id) |
---|
236 | d=summary['documents'][0][0] |
---|
237 | doc=existDB.getDoc('/db/discovery/moles',d) |
---|
238 | |
---|
239 | ok=existDB.sessionRelease(id) |
---|
240 | self.assertEqual(1,ok) |
---|
241 | |
---|
242 | def testSearch(self): |
---|
243 | existDB=ndg_eXist(db='chinook.badc.rl.ac.uk') |
---|
244 | r=existDB.search('coapec',target='ndg_B_metadata') |
---|
245 | print existDB.hits |
---|
246 | for i in r: print i |
---|
247 | r=existDB.search('HadAM2',target='NumSim') |
---|
248 | print existDB.hits |
---|
249 | |
---|
250 | def testedict(self): |
---|
251 | i=edict(a=1,b=2) |
---|
252 | i.c=3 |
---|
253 | i['d']=4 |
---|
254 | correct={'a':1,'b':2,'c':3,'d':4} |
---|
255 | for j in correct: self.assertEqual(correct[j],i[j]) |
---|
256 | |
---|
257 | if __name__=="__main__": |
---|
258 | unittest.main() |
---|
259 | |
---|