source: ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/xmlrpcclient.py @ 4966

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/xmlrpcclient.py@4966
Revision 4966, 9.8 KB checked in by cbyrom, 12 years ago (diff)

Add failsave for sporadic eXist failures - retry query three times
if an xmlrpclib.Failure is thrown since this really shouldn't be.

Line 
1'''
2 An XML-RPC based client for the eXist DB
3 
4 @author: C Byrom, Tessella Feb 09
5'''
6
7import urllib2, base64, xmlrpclib, logging
8import dbconstants as dc
9from xml.etree import cElementTree as ET
10from ndg.common.src.clients.xmldb.interfacexmldbclient import InterfaceXMLDBClient
11
12class XMLRPCClient(InterfaceXMLDBClient):
13   
14    """Access class for eXist"""
15    def __init__(self, host, userID, pw, basePath = "/exist/servlet",
16                 xmlrpcBasePath = "/exist/xmlrpc", port = 8080):
17        '''
18        Instantiates an eXist client via the XML-RPC interface
19        @param host: hostname of the eXist instance to use
20        @param userID: User ID to use with eXist connection
21        @param pw: Password to use for connection
22        @keyword basePath:
23        @keyword xmlrpcBasePath:
24        @keyword port: Port to access DB on.  Default, '8080'
25        '''
26        logging.debug("Initialising XMLRPCClient")
27        self.xmlrpc = self.__getXMLRPCConnection(host, userID, pw, 
28                                                 basePath, xmlrpcBasePath, port)
29        logging.debug("XMLRPCClient initialised")
30
31
32    def __getXMLRPCConnection(self, host, userID, pw, base_path, 
33                              xmlrpc_base_path, port):
34        '''
35        Set up xml rpc connection to eXist DB
36        '''
37        logging.debug("Setting up xmlrpc connection to eXist")
38        logging.debug("Host: '%s', User: '%s'" %(host, userID))
39        authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
40        authinfo.add_password(None, host, userID, pw)
41        authHandler = urllib2.HTTPBasicAuthHandler(authinfo)
42        opener = urllib2.build_opener(authHandler)
43        s = userID+':'+pw
44        self.auth = base64.encodestring(s)[:-1] # strip trailing 12
45        opener.addheaders.append(('Authorization', 'Basic %s' % self.auth))
46        self.http_headers = {'Authorization':'Basic %s' % self.auth}
47        self.opener = opener
48
49        # also create an xmlrpc Server object
50        xmlrpc_uri = 'http://%s:%s@%s:%d%s' %(userID, pw, host, port, xmlrpc_base_path)
51        xmlrpc = xmlrpclib.Server(xmlrpc_uri)
52        logging.debug("xmlrpc connection set up")
53       
54        # add the various config params - incase they need to be used from
55        # elsewhere
56        self.host = host
57        self.port = port
58
59        return xmlrpc
60
61
62    def __executeQuery(self, xquery, params={}):
63        '''
64        Execute an xquery string, return session and summary information
65        @param xquery: xquery to execute
66        @keyword params: a dict with additional parameters to include in xquery
67        invocation
68        @return id, summary from query
69        '''
70        logging.debug("Executing xquery on eXist:\n%s" %xquery)
71        xquery=xmlrpclib.Binary(str(xquery))
72        id = self.xmlrpc.executeQuery(xquery, params)
73        summary = self.xmlrpc.querySummary(id)
74        logging.debug("XQuery executed")
75        return id, summary
76
77
78    def runQuery(self, xquery, offset = 0, noResults = -1, params={}, useChunked = True):
79        '''
80        Execute an xquery string, and retrieve the subsequent results
81        @param xquery: xquery to execute
82        @keyword offset: starting point to retrieve results from - assuming
83        starting index of 0.  Default = 0.
84        @keyword noResults: number of results to return.  Default = -1 - return all results
85        @keyword params: a dict with additional parameters to include in xquery
86        invocation
87        @keyword useChunked: if True, use the xmlrpc.query() command to run the query.
88        Default = True - NB, this is a factor of about 10 faster
89        @return array of result docs generated by query; if none found, an empty array is returned
90        '''
91        logging.debug("Running xquery on eXist")
92       
93        # NB, this could be simplified by just using the xmlrpc.query() function
94        # - need to investigate if this is worth using instead
95        if useChunked:
96            if noResults == -1:
97                # set to arbitrary big number
98                noResults = 100000
99               
100            result = None
101            attempt = 0
102            while not result:
103                try:
104                    result = self.__executeChunkedQuery(xquery, offset, 
105                                                        noResults, params)
106                except xmlrpclib.Fault, f:
107                    if attempt < 3:
108                        attempt += 1
109                        logging.error("XMLRPCLib Fault caught - retrying xquery (attempt %s)" %attempt)
110                        continue
111                    else:
112                        logging.error("Retry limit exceeded - giving up")
113                        raise f
114       
115        id, summary = self.__executeQuery(xquery, params)
116
117        docs = []
118        if summary.has_key('hits') and not summary['hits']:
119            logging.debug("No data returned from xquery - returning")
120            return docs
121       
122        noHits = summary['hits']
123
124        for i in range(noHits):
125           
126            if noResults > -1 and noResults < i + offset + 1:
127                logging.debug("Reached limit on number of required results (%d) - returning" %noResults)
128                break
129           
130            if offset and i + offset > noHits:
131                logging.debug("Reached limit due to specified starting offset (%d) - returning" %offset)
132                break
133           
134            docs.append(self.__retrieve(id, i + offset, params))
135       
136        self.__release(id)
137        logging.debug("Query ran successfully - returning results")
138        return docs
139
140 
141    def __executeChunkedQuery(self, xquery, start, number, params={}):
142        '''
143        Execute a query, return a specific part of the result set, and
144        dump the session automagically
145        @param xquery: xquery to execute
146        @param start: starting index of result - NB, if this index is larger than
147        the number of results, the last result will be returned.  Also if start = 0,
148        the last result will be returned.
149        @param number: number of results to retrieve
150        @keyword params: a dict with additional parameters to include in xquery
151        invocation
152        @return array of documents returned from query
153        '''
154        logging.debug("Executing chunked query on eXist:\n%s" %xquery)
155        xquery = xmlrpclib.Binary(xquery)
156        result = self.xmlrpc.query(xquery, number, start + 1, params)
157       
158        x = ET.fromstring(str(result))
159       
160        results=[]
161        for el in x.getchildren():
162            results.append(ET.tostring(el))
163       
164        logging.debug("XQuery executed")
165        return results
166
167   
168    def getDoc(self, docPath):
169        '''
170        Lightweight interface to the getDocument method
171        @param docPath: path to document to retrieve
172        @return doc contents, if found, None if not
173        '''
174        logging.debug("Retrieving doc at '%s'" %docPath)
175        try:
176            r = self.xmlrpc.getDocumentAsString(docPath,{})
177        except xmlrpclib.Fault, f:
178            logging.debug("Error: %s" %f.message)
179            logging.debug("- document not found - returning None")
180            return None
181        logging.debug("Returning doc contents")
182        return r
183   
184
185    def __release(self, id):
186        '''
187        Release an executeQuery session
188        @param id: id of session to release
189        '''
190        self.xmlrpc.releaseQueryResult(id)
191
192       
193    def __retrieve(self, id, pos, params={}):
194        '''
195        Retrieve a specific document from an executeQuery result set
196        @param id: id of result to retrieve
197        @param pos: offset of results set to retrieve
198        @keyword params: dict of additonal params to include
199        '''
200        logging.debug("Retrieving document from eXist...")
201        xml = self.xmlrpc.retrieve(id,pos,params).data
202        logging.debug("Document retrieved.")
203        return xml
204
205   
206    def createCollection(self,collectionPath):
207        ''' Create a collection in the eXist database at collectionPath '''
208        logging.info("Creating collection: '%s'" %collectionPath)
209        r=self.xmlrpc.createCollection(collectionPath)
210        logging.info("Collection created")
211        return r
212
213   
214    def removeCollection(self,collectionPath):
215        '''
216        Remove a collection in the XML database
217        @param collectionPath: path of the collection to remove
218        @return True if successful, False otherwise
219        '''
220        r=self.xmlrpc.removeCollection(collectionPath)
221        return r
222
223   
224    def removeDoc(self, docPath):
225        '''
226        Remove a document from the eXist database
227        @param docPath: path of document to remove
228        @raises xmlrpc.Fault if doc to be deleted doesn't exist
229        @return True if successful, False otherwise
230        '''
231        r=self.xmlrpc.remove(docPath)
232        return r
233
234   
235    def storeXML(self,xml,path,overwrite=0):
236        '''
237        Store some XML into the databse at path
238        @param xml: content of doc to store
239        @param path: path of doc to store
240        @keyword overwrite: if '1', overwrite doc if it already exists, if '0' (default)
241        don't do this
242        @return True if successful, False otherwise
243        '''
244        return self.xmlrpc.parse(xml,path,overwrite)
245   
246   
247    def describeCollection(self, collectionPath):
248        '''
249        Get info on specified collection - returning this in a struct
250        with fields: name, owner, group, created and permissions
251        - if the collection is not found, None is returned
252        @param collectionPath: Collection to lookup details for
253        @return: struct with info on collection or None, if collection not found
254        '''
255        try:
256            return self.xmlrpc.describeCollection(collectionPath)
257        except xmlrpclib.Fault, f:
258            if str(f).find('not found') > -1:
259                return None
260            raise f
261       
262       
Note: See TracBrowser for help on using the repository browser.