source: exist/trunk/python/ndgUtils/models/existdbclient.py @ 4229

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/models/existdbclient.py@4229
Revision 4229, 10.8 KB checked in by cbyrom, 11 years ago (diff)

Add support for doing lists and summaries of atom docs via ndgDirectory and the existInterface class + add necessary xquery files for this + add new method to
allow retrieval of the collection to which an atom with a specified ID belongs - including the associated xquery file + extend tests to exercise these new features
+ add custom Atom error + improve preservation of key atom attributes when loading existing atoms into an Atom object + improve determining default atom collection

Line 
1'''
2 Class supporting set up and communication with eXist DB
3 for the purposes of creating and updating atoms
4 
5 @author: C Byrom - Tessella 08
6'''
7import os, sys, logging, datetime
8from ndgUtils.eXistInterface import ndg_eXist
9from ndgUtils.eXistConnector import eXistConnector as ec
10from ndgUtils.ndgXqueries import ndgXqueries
11
12try:
13    from xml.etree import ElementTree as ET
14except ImportError:
15    try:
16        import ElementTree as ET
17    except ImportError:
18        import elementtree.ElementTree as ET
19
20class eXistDBClient:
21   
22   
23    def __init__(self, configFile = None, eXistDBHostname = None):
24        '''
25        Initialise a connection to the eXistDB
26        '''
27        logging.info("Initialising connection to eXist DB")
28        self.eXistDBHostname = eXistDBHostname
29       
30        inputs = {}
31        # Firstly load in config data
32        if configFile:
33            self._loadDBDetails(configFile)
34            inputs['passwordFile'] = configFile
35           
36        if self.eXistDBHostname:
37            inputs['db'] = self.eXistDBHostname
38           
39        # Now set up the connection
40        self.xmldb = ndg_eXist(**inputs)
41       
42        # set up any collections required - NB, if these already exist they won't cause any files to be lost
43        self._setUpEXistAtomCollections()
44        logging.info("eXist DB connection initialised")
45
46
47    def createCollections(self, collections):
48        '''
49        Create the specified collections in eXist
50        @param collections: array of collections to create
51        @return True if successful
52        '''
53        logging.info("Setting up eXist collections")
54        for col in collections:
55            logging.debug("Creating collection, '%s'" %col)
56            self.xmldb.createCollection(col)
57        logging.info("All collections set up")
58
59
60    def _setUpEXistAtomCollections(self):
61        '''
62        Set up the required eXist collections needed for running the granulator script
63        '''
64        logging.info("Ensuring required collections are available in eXist")
65        for col in [ec.BASE_COLLECTION_PATH, ec.BACKUP_COLLECTION_PATH]:
66            for type in [ec.OLD_COLLECTION_PATH, ec.PUBLISHED_COLLECTION_PATH, \
67                         ec.SMALL_P_PUBLISHED_COLLECTION_PATH, ec.WORKING_COLLECTION_PATH]:
68                self.xmldb.createCollection(col)
69                self.xmldb.createCollection(col + type)
70                self.xmldb.createCollection(col + type + ec.DE_COLLECTION_PATH)
71                self.xmldb.createCollection(col + type + ec.DEPLOYMENT_COLLECTION_PATH)
72                self.xmldb.createCollection(col + type + ec.GRANULE_COLLECTION_PATH)
73        logging.info("Required collections available")
74       
75
76    def _loadDBDetails(self, configFile):
77        '''
78        Retrieve info from the eXist db config file
79        '''
80        logging.info("Loading DB config data")
81        # Check this file exists
82        if not os.path.isfile(configFile):
83            errorMessage = "ERROR: Could not find the DB config file, %s; please make sure this " \
84                     "is available from the running directory" %configFile
85            logging.error(errorMessage)
86            raise ValueError(errorMessage)
87        dbinfo_file=open(configFile, "r")
88        dbinfo = dbinfo_file.read().split()
89        if len(dbinfo) < 3:
90            errorMessage = 'Incorrect data in DB config file'
91            logging.error(errorMessage)
92            raise ValueError(errorMessage)
93        self.eXistDBHostname = dbinfo[0]
94        self._username = dbinfo[1]
95        self._pw = dbinfo[2]
96        logging.info("DB config data loaded")
97
98
99    def _lookupEXistFile(self, docPath):
100        '''
101        Look up a file in eXist using XPath
102        @param docPath: path to doc to look up
103        @return: id returned from query, with which to retrieve doc; if doc doesn't exist, return None
104        '''
105        logging.info("Retrieving info for file, '%s'" %docPath)
106       
107        doc = self.xmldb.executeQuery('doc("' + docPath + '")')
108       
109        if doc[1]['hits'] == 0:
110            logging.info("File does not exist in eXist DB")
111            return None
112        logging.info("Found file - returning result ID")
113        return doc[0]
114         
115
116    def getEXistFile(self, docPath):
117        '''
118        Use XQuery to retrieve the specified document from eXist
119        @param docPath: the path of the doc to retrieve
120        @return: contents of document if exists, None otherwise
121        '''
122        id = self._lookupEXistFile(docPath)
123       
124        if not id:
125            logging.info("No file found - nothing to retrieve")
126            return None
127       
128        logging.info("Found file - now retrieving content")
129        doc = self.xmldb.retrieve(id, 0)
130        return doc
131
132
133    def isNewEXistFile(self, docPath):
134        '''
135        Backup a file that exists in the eXist DB
136        @param docPath: path of file in eXist to backup
137        '''
138        logging.info("Checking if file, '%s', exists in eXist DB" %docPath)
139       
140        id = self._lookupEXistFile(docPath)
141
142        if id:
143            return False
144       
145        return True
146
147
148    def _addTimeStamp(self, fileName):
149        '''
150        Add timestamp to input filename
151        NB, this assumes there is a file type identifier at the end of the filename; if so, the datestamp
152        is included before this; if not it is just added at the end
153        '''
154        bits = fileName.rsplit(".", 1)
155        fileName = bits[0] + "_" + datetime.datetime.today().strftime("%Y-%m-%dT%H_%M_%S")
156       
157        if len(bits) > 1:
158            fileName += "." + bits[1]
159        return fileName
160
161
162    def backupEXistFile(self, collection, fileName):
163        '''
164        Backup a file that exists in the eXist DB
165        - NB, this really just creates a new file with the same contents in a
166        backup dir
167        @param collection: path of the collection to store the file in
168        @param fileName: name of file to add in eXist
169        @return: path to new backup file
170        '''
171        docPath = collection + fileName
172        logging.info("Backing up file, '%s', in eXist DB" %docPath)
173
174        logging.debug("Firstly, retrieve file contents from eXist")
175        doc = self.getEXistFile(docPath)
176        if not doc:
177            errorMessage = "Could not retrieve file contents (%s) to backup - exiting." %docPath
178            logging.error(errorMessage)
179            raise SystemError(errorMessage)
180       
181        # Now adjust the collection to map to the backup dir
182        collection = collection.replace(ec.BASE_COLLECTION_PATH, ec.BACKUP_COLLECTION_PATH)
183        collection = collection.replace(ec.NDG_A_COLLECTION_PATH, ec.NDG_A_COLLECTION_PATH_BACKUP)
184       
185        # add timestamp to filename
186        fileName = self._addTimeStamp(fileName)
187        docPath = collection + fileName
188
189        logging.debug("Now creating backup file, '%s'" %fileName)
190        self.createEXistFile(doc, collection, fileName)
191       
192        logging.info("File backed up in eXist")
193        return docPath
194
195
196    def createEXistFile(self, xml, collection, fileName):
197        '''
198        Add the input file to the eXist DB
199        @param xml: contents of xml file to create in eXist
200        @param collection: path of the collection to store the file in
201        @param fileName: name of file to add in eXist
202        @return: True, if file created successfully
203        '''
204        logging.info("Adding file, '%s' to eXist DB collection, '%s'" \
205                     %(fileName, collection))
206        logging.debug("data: %s" %xml)
207
208        # create the collection, in case it doesn't already exist - NB, this won't overwrite anything
209        self.createCollections([collection])
210        status = self.xmldb.storeXML(xml, collection + "/" + fileName, overwrite=1)   
211        if not status:
212            errorMessage = "Command to create file in eXist did not complete successfully - exiting"
213            logging.error(errorMessage)
214            raise SystemError(errorMessage)
215       
216        logging.info("File added to eXist")
217        return True
218
219
220    def deleteEXistFile(self, docPath):
221        '''
222        Delete the input file from eXist DB
223        @param docPath: path of document to delete
224        @return: True, if file deleted successfully
225        '''
226        logging.info("Deleting file, '%s', from eXist DB" %docPath)
227
228        status = self.xmldb.removeDoc(docPath)   
229        if not status:
230            errorMessage = "Command to delete file in eXist did not complete successfully - exiting"
231            logging.error(errorMessage)
232            raise SystemError(errorMessage)
233       
234        logging.info("File deleted from eXist")
235        return True
236
237
238    def createOrUpdateEXistFile(self, xml, collection, fileName):
239        '''
240        Check if a file already exists in eXist; if it does, run an
241        update (which will backup the existing file), otherwise create
242        the file in eXist
243        @param xml: contents of xml file to create/update in eXist
244        @param collection: path of the collection to store the file in
245        @param fileName: name of file to add in eXist
246        '''
247        logging.info("Creating or updating file in eXist...")
248        if not self.isNewEXistFile(collection + fileName):
249            self.backupEXistFile(collection, fileName)
250           
251        self.createEXistFile(xml, collection, fileName)
252
253
254    def getAtomFileCollectionPath(self, atomID):
255        '''
256        Given an atom id, determine and return the collection path in eXist
257        of the associated atom file
258        @param atom: atom id to look up
259        @return: collection path, if it exists, None, otherwise
260        '''
261        logging.info("Looking up collection path for atom ID, '%s'" %atomID)
262        xq = ndgXqueries()['atomFullPath']
263        xq = xq.replace('TargetCollection', ec.BASE_COLLECTION_PATH)
264        xq = xq.replace('LocalID', atomID)
265
266        id, doc = self.xmldb.executeQuery(xq)
267        if doc['hits'] == 0:
268            logging.info("No document found with the specified ID")
269            return None
270
271        doc = self.xmldb.retrieve(id,0,{})
272
273        docET = ET.fromstring(doc)
274        collPath = docET.text + '/'
275        logging.debug("Found collection path, '%s'" %collPath)
276        return collPath
277       
278           
279    def createAtomInExist(self, atom):
280        '''
281        Create an atom in the eXist DB
282        @param atom: atom object to create in the DB
283        '''
284        logging.info("Creating atom in eXist")
285        eXistCollection = self.getAtomFileCollectionPath(atom.atomID)
286       
287        # if collection not found, assume we're dealing with a new atom; get its
288        # default collection
289        if not eXistCollection:
290            eXistCollection = atom.getDefaultCollectionPath()
291        # create backup of atom if it already exists
292        else:
293            self.backupEXistFile(eXistCollection, atom.atomName)
294           
295        self.createEXistFile(atom.toPrettyXML(), eXistCollection, atom.atomName)
296           
297        logging.info("Atom created in eXist")
Note: See TracBrowser for help on using the repository browser.