source: ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/xmlrpcclient.py @ 4922

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/xmlrpcclient.py@4922
Revision 4922, 9.3 KB checked in by cbyrom, 12 years ago (diff)

Create eXist client to the XML-RPC interface - implementing the
xmldb interface defined for ndgcommon.

Line 
1'''
2 An XML-RPC based client for the eXist DB
3 
4 @author: C Byrom, Tessella Feb 09
5'''
6
7import urllib2, base64, xmlrpclib, logging
8import dbconstants as dc
9from xml.etree import cElementTree as ET
10from ndg.common.src.lib.ETxmlView import et2text
11from ndg.common.src.clients.xmldb.interfacexmldbclient import InterfaceXMLDBClient
12
13class XMLRPCClient(InterfaceXMLDBClient):
14   
15    """Access class for eXist"""
16    def __init__(self, host, userID, pw, basePath = "/exist/servlet",
17                 xmlrpcBasePath = "/exist/xmlrpc", port = 8080):
18        '''
19        Instantiates an eXist client via the XML-RPC interface
20        @param host: hostname of the eXist instance to use
21        @param userID: User ID to use with eXist connection
22        @param pw: Password to use for connection
23        @keyword basePath:
24        @keyword xmlrpcBasePath:
25        @keyword port: Port to access DB on.  Default, '8080'
26        '''
27        logging.debug("Setting up xmlrpc connection to eXist")
28        self.xmlrpc = self.__getXMLRPCConnection(host, userID, pw, 
29                                                 basePath, xmlrpcBasePath, port)
30        logging.debug("Xml-rpc connection to eXist set up")
31
32
33    def __getXMLRPCConnection(self, host, userID, pw, base_path, 
34                              xmlrpc_base_path, port):
35        '''
36        Set up xml rpc connection to eXist DB
37        '''
38        logging.debug("Setting up xmlrpc connection to eXist")
39        logging.debug("Host: '%s', User: '%s'" %(host, userID))
40        authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
41        authinfo.add_password(None, host, userID, pw)
42        authHandler = urllib2.HTTPBasicAuthHandler(authinfo)
43        opener = urllib2.build_opener(authHandler)
44        s = userID+':'+pw
45        self.auth = base64.encodestring(s)[:-1] # strip trailing 12
46        opener.addheaders.append(('Authorization', 'Basic %s' % self.auth))
47        self.http_headers = {'Authorization':'Basic %s' % self.auth}
48        self.opener = opener
49
50        # also create an xmlrpc Server object
51        xmlrpc_uri = 'http://%s:%s@%s:%d%s' %(userID, pw, host, port, xmlrpc_base_path)
52        xmlrpc = xmlrpclib.Server(xmlrpc_uri)
53        logging.debug("xmlrpc connection set up")
54       
55        # add the various config params - incase they need to be used from
56        # elsewhere
57        self.host = host
58        self.port = port
59
60        return xmlrpc
61
62
63    def __executeQuery(self, xquery, params={}):
64        '''
65        Execute an xquery string, return session and summary information
66        @param xquery: xquery to execute
67        @keyword params: a dict with additional parameters to include in xquery
68        invocation
69        @return id, summary from query
70        '''
71        logging.debug("Executing xquery on eXist:\n%s" %xquery)
72        xquery=xmlrpclib.Binary(str(xquery))
73        id = self.xmlrpc.executeQuery(xquery, params)
74        summary = self.xmlrpc.querySummary(id)
75        logging.debug("XQuery executed")
76        return id, summary
77
78
79    def runQuery(self, xquery, offset = 0, noResults = -1, params={}, useChunked = True):
80        '''
81        Execute an xquery string, and retrieve the subsequent results
82        @param xquery: xquery to execute
83        @keyword offset: starting point to retrieve results from - assuming
84        starting index of 0.  Default = 0.
85        @keyword noResults: number of results to return.  Default = -1 - return all results
86        @keyword params: a dict with additional parameters to include in xquery
87        invocation
88        @keyword useChunked: if True, use the xmlrpc.query() command to run the query.
89        Default = True - NB, this is a factor of about 10 faster
90        @return array of result docs generated by query; if none found, an empty array is returned
91        '''
92        logging.debug("Running xquery on eXist")
93       
94        # NB, this could be simplified by just using the xmlrpc.query() function
95        # - need to investigate if this is worth using instead
96        if useChunked:
97            if noResults == -1:
98                # set to arbitrary big number
99                noResults = 100000
100            return self.__executeChunkedQuery(xquery, offset, noResults, params)
101       
102        id, summary = self.__executeQuery(xquery, params)
103
104        docs = []
105        if summary.has_key('hits') and not summary['hits']:
106            logging.debug("No data returned from xquery - returning")
107            return docs
108       
109        noHits = summary['hits']
110
111        for i in range(noHits):
112           
113            if noResults > -1 and noResults < i + offset + 1:
114                logging.debug("Reached limit on number of required results (%d) - returning" %noResults)
115                break
116           
117            if offset and i + offset > noHits:
118                logging.debug("Reached limit due to specified starting offset (%d) - returning" %offset)
119                break
120           
121            docs.append(self.__retrieve(id, i + offset, params))
122       
123        self.__release(id)
124        logging.debug("Query ran successfully - returning results")
125        return docs
126
127 
128    def __executeChunkedQuery(self, xquery, start, number, params={}):
129        '''
130        Execute a query, return a specific part of the result set, and
131        dump the session automagically
132        @param xquery: xquery to execute
133        @param start: starting index of result - NB, if this index is larger than
134        the number of results, the last result will be returned.  Also if start = 0,
135        the last result will be returned.
136        @param number: number of results to retrieve
137        @keyword params: a dict with additional parameters to include in xquery
138        invocation
139        @return array of documents returned from query
140        '''
141        logging.debug("Executing chunked query on eXist:\n%s" %xquery)
142        xquery = xmlrpclib.Binary(xquery)
143        result = self.xmlrpc.query(xquery, number, start + 1, params)
144       
145        x = ET.fromstring(str(result))
146       
147        results=[]
148        for el in x.getchildren():
149            results.append(et2text(el))
150       
151        logging.debug("XQuery executed")
152        return results
153
154   
155    def getDoc(self, docPath):
156        '''
157        Lightweight interface to the getDocument method
158        @param docPath: path to document to retrieve
159        @return doc contents, if found, None if not
160        '''
161        logging.debug("Retrieving doc at '%s'" %docPath)
162        try:
163            r = self.xmlrpc.getDocumentAsString(docPath,{})
164        except xmlrpclib.Fault, f:
165            logging.debug("Error: %s" %f.message)
166            logging.debug("- document not found - returning None")
167            return None
168        logging.debug("Returning doc contents")
169        return r
170   
171
172    def __release(self, id):
173        '''
174        Release an executeQuery session
175        @param id: id of session to release
176        '''
177        self.xmlrpc.releaseQueryResult(id)
178
179       
180    def __retrieve(self, id, pos, params={}):
181        '''
182        Retrieve a specific document from an executeQuery result set
183        @param id: id of result to retrieve
184        @param pos: offset of results set to retrieve
185        @keyword params: dict of additonal params to include
186        '''
187        logging.debug("Retrieving document from eXist...")
188        xml = self.xmlrpc.retrieve(id,pos,params).data
189        logging.debug("Document retrieved.")
190        return xml
191
192   
193    def createCollection(self,collectionPath):
194        ''' Create a collection in the eXist database at collectionPath '''
195        logging.info("Creating collection: '%s'" %collectionPath)
196        r=self.xmlrpc.createCollection(collectionPath)
197        logging.info("Collection created")
198        return r
199
200   
201    def removeCollection(self,collectionPath):
202        '''
203        Remove a collection in the XML database
204        @param collectionPath: path of the collection to remove
205        @return True if successful, False otherwise
206        '''
207        r=self.xmlrpc.removeCollection(collectionPath)
208        return r
209
210   
211    def removeDoc(self, docPath):
212        '''
213        Remove a document from the eXist database
214        @param docPath: path of document to remove
215        @raises xmlrpc.Fault if doc to be deleted doesn't exist
216        @return True if successful, False otherwise
217        '''
218        r=self.xmlrpc.remove(docPath)
219        return r
220
221   
222    def storeXML(self,xml,path,overwrite=0):
223        '''
224        Store some XML into the databse at path
225        @param xml: content of doc to store
226        @param path: path of doc to store
227        @keyword overwrite: if '1', overwrite doc if it already exists, if '0' (default)
228        don't do this
229        @return True if successful, False otherwise
230        '''
231        return self.xmlrpc.parse(xml,path,overwrite)
232   
233   
234    def describeCollection(self, collectionPath):
235        '''
236        Get info on specified collection - returning this in a struct
237        with fields: name, owner, group, created and permissions
238        - if the collection is not found, None is returned
239        @param collectionPath: Collection to lookup details for
240        @return: struct with info on collection or None, if collection not found
241        '''
242        try:
243            return self.xmlrpc.describeCollection(collectionPath)
244        except xmlrpclib.Fault, f:
245            if str(f).find('not found') > -1:
246                return None
247            raise f
248       
249       
Note: See TracBrowser for help on using the repository browser.