source: ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/crudclient.py @ 5184

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/crudclient.py@5184
Revision 5184, 8.8 KB checked in by cbyrom, 11 years ago (diff)

Simplify backups structure in eXist - putting all backups under a
single toplevel structure.

Line 
1'''
2 Client for eXist DB - extending the search operations from the SearchClient
3 to add basic CRUD operations
4 
5 @author: C Byrom - Tessella, Feb 09
6'''
7import logging, os, xmlrpclib
8from ndg.common.src.clients.abstractcrudclient import *
9from xml.etree import cElementTree as ET
10import dbconstants as dc
11from searchclient import SearchClient
12
13
14class CRUDClient(SearchClient, AbstractCRUDClient):
15
16    def __init__(self, dbHostName = 'chinook.badc.rl.ac.uk', configFileName ='passwords.txt', 
17                 clientType = dc.XML_RPC_CLIENT):
18        '''
19        Set up a search client for eXist DB
20        @keyword dbHostName: eXist host to use - defaults to 'chinook.badc.rl.ac.uk'
21        @keyword configFileName: password file to use - NB, this should
22        have contents of format (NB, space delimiter):
23        dbName userID password
24        Default is 'passwords.txt'.
25        @keyword clientType: Type of client to use for eXist connection.  Currently
26        only supported by 'xmlrpc' - which is the default.
27        '''
28        logging.debug("Initialising CRUDClient")
29        super(CRUDClient, self).__init__(dbHostName = dbHostName, 
30                                                                                 configFileName = configFileName,
31                                                                                 clientType = clientType)
32       
33        logging.debug("CRUDClient initialised")
34
35
36    def isNewDoc(self, docPath):
37        '''
38        Test if a doc already exists in the eXist DB
39        @param docPath: path of file to look up
40        @return: True if a new doc, False if otherwise
41        '''
42        logging.debug("Checking if file, '%s', exists in eXist DB" %docPath)
43       
44        doc = self.__getDoc(docPath)
45
46        if doc:
47            logging.debug("- file exists")
48            return False
49
50        logging.debug("- file does not exist")
51        return True
52
53
54    def __getDoc(self, docPath):
55        '''
56        Wrapper to the getDocs method, to simplify code when dealing with
57        only a single doc
58        @param docPath: path to doc to retrieve
59        @return: doc or None, if not found
60        '''
61        docs = self.getDocs([docPath])
62        if docs.has_key(docPath):
63            return docs[docPath]
64       
65        return None
66
67
68    def backupDoc(self, collection, fileName, runAsynch = True):
69        '''
70        Backup a file that exists in the XML DB
71        - NB, this really just creates a new file with the same contents in a
72        backup dir
73        @param collection: path of the collection to store the file in
74        @param fileName: name of file to add
75        @param runAsynch: if True, do the backup asynchronously in a separate thread
76        @return: path to new backup file
77        '''
78        if not collection.endswith('/'):
79            collection += '/'
80           
81        docPath = collection + fileName
82        logging.info("Backing up file, '%s', in eXist DB" %docPath)
83
84        logging.debug("Firstly, retrieve file contents from eXist")
85        doc = self.__getDoc(docPath)
86       
87        if not doc:
88            errorMessage = "Could not retrieve file contents (%s) to backup - exiting." %docPath
89            logging.error(errorMessage)
90            raise SystemError(errorMessage)
91       
92        # Now adjust the collection to map to the backup dir
93        collection = dc.BACKUP_COLLECTION_PATH + collection
94       
95        # add timestamp to filename
96        fileName = self._addTimeStamp(fileName)
97        docPath = collection + fileName
98       
99        if runAsynch:
100            # run the back up in a separate thread
101            thread = backingUpThread(self, doc, collection, fileName)
102            thread.start()
103        else:
104            self.createDoc(doc, collection, fileName)
105
106        self.backupName = docPath
107        return docPath
108
109   
110    def restoreBackup(self, docPath):
111        '''
112        Restore the backed up file - effectively recreating in the non-backup collection
113        @param docPath: path to file to backup
114        @param restoredFileName: path to newly restored file
115        '''
116        logging.info("Restoring file, '%s' in eXist" %docPath)
117        doc = self.__getDoc(docPath)
118       
119        if not doc:
120            errorMessage = "Could not retrieve file contents (%s) to backup - exiting." %docPath
121            logging.error(errorMessage)
122            raise SystemError(errorMessage)
123       
124        bits = docPath.split('/')
125        fileName = bits[-1]
126        collection = '/'.join(bits[0:-1])
127        # remove timestamp from filename
128        fileName = self._removeTimeStamp(fileName)
129       
130        # Now adjust the collection to map to the backup dir
131        collection = collection.replace(dc.BACKUP_COLLECTION_PATH, '')
132       
133        self.createDoc(doc, collection, fileName)
134        restoredFileName = "%s/%s" %(collection, fileName)
135        logging.info("File restored - to %s" %restoredFileName)
136        return restoredFileName
137       
138
139    def createCollections(self, collections):
140        '''
141        Create the specified collections in eXist
142        @param collections: array of collections to create
143        @return True if successful
144        '''
145        logging.info("Setting up eXist collections")
146        for col in collections:
147            logging.debug("Creating collection, '%s'" %col)
148            self.client.createCollection(col)
149        logging.info("All collections set up")
150
151
152    def createDoc(self, xml, collection, fileName):
153        '''
154        Add the input doc to the eXist DB
155        @param xml: contents of xml file to create in the XML DB
156        @param collection: path of the collection to store the file in
157        @param fileName: name of file to add
158        @return: True, if file created successfully
159        '''
160        logging.info("Adding file, '%s' to eXist DB collection, '%s'" \
161                     %(fileName, collection))
162        logging.debug("data: %s" %xml)
163
164        # create the collection, if it doesn't already exist - NB, this won't overwrite anything
165        if not collection in self.collections.values():
166            self.createCollections([collection])
167
168        status = self.client.createDoc(xml, collection + "/" + fileName, overwrite=1)
169        if not status:
170            errorMessage = "Command to create file in eXist did not complete successfully - exiting"
171            logging.error(errorMessage)
172            raise SystemError(errorMessage)
173       
174        # update the stored collections info, if being used
175        if self.collections:
176            self.collections[fileName] = collection
177       
178        logging.info("File added to eXist")
179        return True
180
181
182    def deleteDoc(self, docPath):
183        '''
184        Delete the input file from the eXist DB
185        @param docPath: path of document to delete
186        @return: True, if file deleted successfully, False if file not found
187        '''
188        logging.info("Deleting file, '%s', from eXist DB" %docPath)
189
190        try:
191            status = self.client.removeDoc(docPath)   
192            if not status:
193                errorMessage = "Command to delete file in eXist did not complete successfully - exiting"
194                logging.error(errorMessage)
195                raise SystemError(errorMessage)
196        except xmlrpclib.Fault, f:
197            # NB, allow some leniency here - if the fault says the file doesn't exist
198            # then we may not need to worry about deleting it in the first place
199            if str(f).find('not found') > -1:
200                return False
201           
202            raise f
203       
204        logging.info("File deleted from eXist")
205        return True
206
207
208    def createOrUpdateDoc(self, xml, collection, fileName):
209        '''
210        Check if a file already exists in the eXist DB; if it does, run an
211        update (which will backup the existing file), otherwise create
212        the file in eXist
213        @param xml: contents of xml file to create/update in the eXist DB
214        @param collection: path of the collection to store the file in
215        @param fileName: name of file to add in eXist
216        @return name of backup file, if file needed to be backed up - or None, otherwise
217        '''
218        logging.info("Creating or updating file in eXist...")
219       
220        backupFilename = None
221        if not self.isNewDoc(collection + '/' + fileName):
222            backupFilename = self.backupDoc(collection, fileName)
223           
224        self.createDoc(xml, collection, fileName)
225       
226        return backupFilename
227
228
229    def isNewCollection(self, collectionPath):
230        '''
231        Check a collection exists in eXist using XPath
232        @param collectionPath: path to collection to look up
233        @return: False if collection exists, True otherwise
234        '''
235        logging.debug("Checking if collection, '%s', exists in eXist" %collectionPath)
236        colInfo = self.client.describeCollection(collectionPath)
237        if colInfo:
238            logging.debug("Found collection")
239            return False
240
241        logging.debug("Collection does not exist in eXist DB")
242        return True
Note: See TracBrowser for help on using the repository browser.