source: ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/crudclient.py @ 4976

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/crudclient.py@4976
Revision 4976, 9.1 KB checked in by cbyrom, 12 years ago (diff)

Implement an eXist client based on the RESTful DB interface and implementing the interfacexmldbclient interface. Also add some small
code tidy ups and fix a couple of bugs.

Line 
1'''
2 Client for eXist DB - extending the search operations from the SearchClient
3 to add basic CRUD operations
4 
5 @author: C Byrom - Tessella, Feb 09
6'''
7import logging, os, xmlrpclib
8from ndg.common.src.clients.abstractcrudclient import *
9from xml.etree import cElementTree as ET
10import dbconstants as dc
11from searchclient import SearchClient
12
13
14class CRUDClient(SearchClient, AbstractCRUDClient):
15
16    def __init__(self, dbHostName = 'chinook.badc.rl.ac.uk', configFileName ='passwords.txt', 
17                 clientType = dc.XML_RPC_CLIENT):
18        '''
19        Set up a search client for eXist DB
20        @keyword dbHostName: eXist host to use - defaults to 'chinook.badc.rl.ac.uk'
21        @keyword configFileName: password file to use - NB, this should
22        have contents of format (NB, space delimiter):
23        dbName userID password
24        Default is 'passwords.txt'.
25        @keyword clientType: Type of client to use for eXist connection.  Currently
26        only supported by 'xmlrpc' - which is the default.
27        '''
28        logging.debug("Initialising CRUDClient")
29        super(CRUDClient, self).__init__(dbHostName = dbHostName, 
30                                                                                 configFileName = configFileName,
31                                                                                 clientType = clientType)
32       
33        logging.debug("CRUDClient initialised")
34
35
36    def isNewDoc(self, docPath):
37        '''
38        Test if a doc already exists in the eXist DB
39        @param docPath: path of file to look up
40        @return: True if a new doc, False if otherwise
41        '''
42        logging.debug("Checking if file, '%s', exists in eXist DB" %docPath)
43       
44        doc = self.__getDoc(docPath)
45
46        if doc:
47            logging.debug("- file exists")
48            return False
49
50        logging.debug("- file does not exist")
51        return True
52
53
54    def __getDoc(self, docPath):
55        '''
56        Wrapper to the getDocs method, to simplify code when dealing with
57        only a single doc
58        @param docPath: path to doc to retrieve
59        @return: doc or None, if not found
60        '''
61        docs = self.getDocs([docPath])
62        if docs.has_key(docPath):
63            return docs[docPath]
64       
65        return None
66
67
68    def backupDoc(self, collection, fileName, runAsynch = True):
69        '''
70        Backup a file that exists in the XML DB
71        - NB, this really just creates a new file with the same contents in a
72        backup dir
73        @param collection: path of the collection to store the file in
74        @param fileName: name of file to add
75        @param runAsynch: if True, do the backup asynchronously in a separate thread
76        @return: path to new backup file
77        '''
78        if not collection.endswith('/'):
79            collection += '/'
80           
81        docPath = collection + fileName
82        logging.info("Backing up file, '%s', in eXist DB" %docPath)
83
84        logging.debug("Firstly, retrieve file contents from eXist")
85        doc = self.__getDoc(docPath)
86       
87        if not doc:
88            errorMessage = "Could not retrieve file contents (%s) to backup - exiting." %docPath
89            logging.error(errorMessage)
90            raise SystemError(errorMessage)
91       
92        # Now adjust the collection to map to the backup dir
93        collection = collection.replace(dc.ATOM_COLLECTION_PATH, 
94                                        dc.BACKUP_COLLECTION_PATH)
95        collection = collection.replace(dc.NDG_A_COLLECTION_PATH, 
96                                        dc.NDG_A_COLLECTION_PATH_BACKUP)
97       
98        # add timestamp to filename
99        fileName = self._addTimeStamp(fileName)
100        docPath = collection + fileName
101       
102        if runAsynch:
103            # run the back up in a separate thread
104            thread = backingUpThread(self, doc, collection, fileName)
105            thread.start()
106        else:
107            self.createDoc(doc, collection, fileName)
108
109        self.backupName = docPath
110        return docPath
111
112   
113    def restoreBackup(self, docPath):
114        '''
115        Restore the backed up file - effectively recreating in the non-backup collection
116        @param docPath: path to file to backup
117        @param restoredFileName: path to newly restored file
118        '''
119        logging.info("Restoring file, '%s' in eXist" %docPath)
120        doc = self.__getDoc(docPath)
121       
122        if not doc:
123            errorMessage = "Could not retrieve file contents (%s) to backup - exiting." %docPath
124            logging.error(errorMessage)
125            raise SystemError(errorMessage)
126       
127        bits = docPath.split('/')
128        fileName = bits[-1]
129        collection = '/'.join(bits[0:-1])
130        # remove timestamp from filename
131        fileName = self._removeTimeStamp(fileName)
132       
133        # Now adjust the collection to map to the backup dir
134        collection = collection.replace(dc.BACKUP_COLLECTION_PATH, dc.ATOM_COLLECTION_PATH)
135        collection = collection.replace(dc.NDG_A_COLLECTION_PATH_BACKUP, dc.NDG_A_COLLECTION_PATH)
136       
137        self.createDoc(doc, collection, fileName)
138        restoredFileName = "%s/%s" %(collection, fileName)
139        logging.info("File restored - to %s" %restoredFileName)
140        return restoredFileName
141       
142
143    def createCollections(self, collections):
144        '''
145        Create the specified collections in eXist
146        @param collections: array of collections to create
147        @return True if successful
148        '''
149        logging.info("Setting up eXist collections")
150        for col in collections:
151            logging.debug("Creating collection, '%s'" %col)
152            self.client.createCollection(col)
153        logging.info("All collections set up")
154
155
156    def createDoc(self, xml, collection, fileName):
157        '''
158        Add the input doc to the eXist DB
159        @param xml: contents of xml file to create in the XML DB
160        @param collection: path of the collection to store the file in
161        @param fileName: name of file to add
162        @return: True, if file created successfully
163        '''
164        logging.info("Adding file, '%s' to eXist DB collection, '%s'" \
165                     %(fileName, collection))
166        logging.debug("data: %s" %xml)
167
168        # create the collection, if it doesn't already exist - NB, this won't overwrite anything
169        if not collection in self.collections.values():
170            self.createCollections([collection])
171
172        status = self.client.createDoc(xml, collection + "/" + fileName, overwrite=1)
173        if not status:
174            errorMessage = "Command to create file in eXist did not complete successfully - exiting"
175            logging.error(errorMessage)
176            raise SystemError(errorMessage)
177       
178        # update the stored collections info, if being used
179        if self.collections:
180            self.collections[fileName] = collection
181       
182        logging.info("File added to eXist")
183        return True
184
185
186    def deleteDoc(self, docPath):
187        '''
188        Delete the input file from the eXist DB
189        @param docPath: path of document to delete
190        @return: True, if file deleted successfully, False if file not found
191        '''
192        logging.info("Deleting file, '%s', from eXist DB" %docPath)
193
194        try:
195            status = self.client.removeDoc(docPath)   
196            if not status:
197                errorMessage = "Command to delete file in eXist did not complete successfully - exiting"
198                logging.error(errorMessage)
199                raise SystemError(errorMessage)
200        except xmlrpclib.Fault, f:
201            # NB, allow some leniency here - if the fault says the file doesn't exist
202            # then we may not need to worry about deleting it in the first place
203            if str(f).find('not found') > -1:
204                return False
205           
206            raise f
207       
208        logging.info("File deleted from eXist")
209        return True
210
211
212    def createOrUpdateDoc(self, xml, collection, fileName):
213        '''
214        Check if a file already exists in the eXist DB; if it does, run an
215        update (which will backup the existing file), otherwise create
216        the file in eXist
217        @param xml: contents of xml file to create/update in the eXist DB
218        @param collection: path of the collection to store the file in
219        @param fileName: name of file to add in eXist
220        @return name of backup file, if file needed to be backed up - or None, otherwise
221        '''
222        logging.info("Creating or updating file in eXist...")
223       
224        backupFilename = None
225        if not self.isNewDoc(collection + '/' + fileName):
226            backupFilename = self.backupDoc(collection, fileName)
227           
228        self.createDoc(xml, collection, fileName)
229       
230        return backupFilename
231
232
233    def isNewCollection(self, collectionPath):
234        '''
235        Check a collection exists in eXist using XPath
236        @param collectionPath: path to collection to look up
237        @return: False if collection exists, True otherwise
238        '''
239        logging.debug("Checking if collection, '%s', exists in eXist" %collectionPath)
240        colInfo = self.client.describeCollection(collectionPath)
241        if colInfo:
242            logging.debug("Found collection")
243            return False
244
245        logging.debug("Collection does not exist in eXist DB")
246        return True
Note: See TracBrowser for help on using the repository browser.