source: exist/trunk/python/ndgUtils/models/existdbclient.py @ 4282

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/models/existdbclient.py@4282
Revision 4282, 12.1 KB checked in by cbyrom, 11 years ago (diff)

Couple of small fixes.

Line 
1'''
2 Class supporting set up and communication with eXist DB
3 for the purposes of creating and updating atoms
4 
5 @author: C Byrom - Tessella 08
6'''
7import os, sys, logging, datetime
8from ndgUtils.eXistInterface import ndg_eXist
9from ndgUtils.eXistConnector import eXistConnector as ec
10from ndgUtils.ndgXqueries import ndgXqueries
11import uuid
12
13try:
14    from xml.etree import ElementTree as ET
15except ImportError:
16    try:
17        import ElementTree as ET
18    except ImportError:
19        import elementtree.ElementTree as ET
20
21class eXistDBClient:
22   
23    def __init__(self, configFile = None, eXistDBHostname = None):
24        '''
25        Initialise a connection to the eXistDB
26        '''
27        logging.info("Initialising connection to eXist DB")
28        self.eXistDBHostname = eXistDBHostname
29        logging.debug("- connecting to DB, '%s', with config file, '%s'" \
30                      %(eXistDBHostname or 'Default', configFile or 'Default'))
31        inputs = {}
32       
33        # NB, there are two routes through here: if a config file is specified
34        # without a hostname, the host will be taken to be the first entry in
35        # the config file; if a hostname is specified, it will be used explicitly
36        if configFile:
37            if not self.eXistDBHostname:
38                self._loadDBDetails(configFile)
39            inputs['passwordFile'] = configFile
40           
41        if self.eXistDBHostname:
42            inputs['db'] = self.eXistDBHostname
43           
44        # Now set up the connection
45        logging.debug(inputs)
46        self.xmldb = ndg_eXist(**inputs)
47       
48        # set up any collections required - NB, if these already exist they won't cause any files to be lost
49        self._setUpEXistAtomCollections()
50        logging.info("eXist DB connection initialised")
51
52
53    def createCollections(self, collections):
54        '''
55        Create the specified collections in eXist
56        @param collections: array of collections to create
57        @return True if successful
58        '''
59        logging.info("Setting up eXist collections")
60        for col in collections:
61            logging.debug("Creating collection, '%s'" %col)
62            self.xmldb.createCollection(col)
63        logging.info("All collections set up")
64
65
66    def _setUpEXistAtomCollections(self):
67        '''
68        Set up the required eXist collections needed for running the granulator script
69        '''
70        logging.info("Ensuring required collections are available in eXist")
71        for col in [ec.BASE_COLLECTION_PATH, ec.BACKUP_COLLECTION_PATH]:
72            for type in [ec.OLD_COLLECTION_PATH, ec.PUBLISHED_COLLECTION_PATH, \
73                         ec.SMALL_P_PUBLISHED_COLLECTION_PATH, ec.WORKING_COLLECTION_PATH]:
74                self.xmldb.createCollection(col)
75                self.xmldb.createCollection(col + type)
76                self.xmldb.createCollection(col + type + ec.DE_COLLECTION_PATH)
77                self.xmldb.createCollection(col + type + ec.DEPLOYMENT_COLLECTION_PATH)
78                self.xmldb.createCollection(col + type + ec.GRANULE_COLLECTION_PATH)
79        logging.info("Required collections available")
80       
81
82    def _loadDBDetails(self, configFile):
83        '''
84        Retrieve info from the eXist db config file
85        '''
86        logging.info("Loading DB config data")
87        # Check this file exists
88        if not os.path.isfile(configFile):
89            errorMessage = "ERROR: Could not find the DB config file, %s; please make sure this " \
90                     "is available from the running directory" %configFile
91            logging.error(errorMessage)
92            raise ValueError(errorMessage)
93        dbinfo_file=open(configFile, "r")
94        dbinfo = dbinfo_file.read().split()
95        if len(dbinfo) < 3:
96            errorMessage = 'Incorrect data in DB config file'
97            logging.error(errorMessage)
98            raise ValueError(errorMessage)
99        self.eXistDBHostname = dbinfo[0]
100        self._username = dbinfo[1]
101        self._pw = dbinfo[2]
102        logging.info("DB config data loaded")
103
104
105    def _lookupEXistFile(self, docPath):
106        '''
107        Look up a file in eXist using XPath
108        @param docPath: path to doc to look up
109        @return: id returned from query, with which to retrieve doc; if doc doesn't exist, return None
110        '''
111        logging.info("Retrieving info for file, '%s'" %docPath)
112       
113        doc = self.xmldb.executeQuery('doc("' + docPath + '")')
114       
115        if doc[1]['hits'] == 0:
116            logging.info("File does not exist in eXist DB")
117            return None
118        logging.info("Found file - returning result ID")
119        return doc[0]
120         
121
122    def getEXistFile(self, docPath):
123        '''
124        Use XQuery to retrieve the specified document from eXist
125        @param docPath: the path of the doc to retrieve
126        @return: contents of document if exists, None otherwise
127        '''
128        id = self._lookupEXistFile(docPath)
129       
130        if not id:
131            logging.info("No file found - nothing to retrieve")
132            return None
133       
134        logging.info("Found file - now retrieving content")
135        doc = self.xmldb.retrieve(id, 0)
136        return doc
137
138
139    def isNewEXistFile(self, docPath):
140        '''
141        Backup a file that exists in the eXist DB
142        @param docPath: path of file in eXist to backup
143        '''
144        logging.info("Checking if file, '%s', exists in eXist DB" %docPath)
145       
146        id = self._lookupEXistFile(docPath)
147
148        if id:
149            return False
150       
151        return True
152
153
154    def _addTimeStamp(self, fileName):
155        '''
156        Add timestamp to input filename
157        NB, this assumes there is a file type identifier at the end of the filename; if so, the datestamp
158        is included before this; if not it is just added at the end
159        '''
160        bits = fileName.rsplit(".", 1)
161        fileName = bits[0] + "_" + datetime.datetime.today().strftime("%Y-%m-%dT%H_%M_%S")
162       
163        if len(bits) > 1:
164            fileName += "." + bits[1]
165        return fileName
166
167
168    def backupEXistFile(self, collection, fileName):
169        '''
170        Backup a file that exists in the eXist DB
171        - NB, this really just creates a new file with the same contents in a
172        backup dir
173        @param collection: path of the collection to store the file in
174        @param fileName: name of file to add in eXist
175        @return: path to new backup file
176        '''
177        docPath = collection + fileName
178        logging.info("Backing up file, '%s', in eXist DB" %docPath)
179
180        logging.debug("Firstly, retrieve file contents from eXist")
181        doc = self.getEXistFile(docPath)
182        if not doc:
183            errorMessage = "Could not retrieve file contents (%s) to backup - exiting." %docPath
184            logging.error(errorMessage)
185            raise SystemError(errorMessage)
186       
187        # Now adjust the collection to map to the backup dir
188        collection = collection.replace(ec.BASE_COLLECTION_PATH, ec.BACKUP_COLLECTION_PATH)
189        collection = collection.replace(ec.NDG_A_COLLECTION_PATH, ec.NDG_A_COLLECTION_PATH_BACKUP)
190       
191        # add timestamp to filename
192        fileName = self._addTimeStamp(fileName)
193        docPath = collection + fileName
194
195        logging.debug("Now creating backup file, '%s'" %fileName)
196        self.createEXistFile(doc, collection, fileName)
197       
198        logging.info("File backed up in eXist")
199        return docPath
200
201
202    def createEXistFile(self, xml, collection, fileName):
203        '''
204        Add the input file to the eXist DB
205        @param xml: contents of xml file to create in eXist
206        @param collection: path of the collection to store the file in
207        @param fileName: name of file to add in eXist
208        @return: True, if file created successfully
209        '''
210        logging.info("Adding file, '%s' to eXist DB collection, '%s'" \
211                     %(fileName, collection))
212        logging.debug("data: %s" %xml)
213
214        # create the collection, in case it doesn't already exist - NB, this won't overwrite anything
215        self.createCollections([collection])
216        status = self.xmldb.storeXML(xml, collection + "/" + fileName, overwrite=1)   
217        if not status:
218            errorMessage = "Command to create file in eXist did not complete successfully - exiting"
219            logging.error(errorMessage)
220            raise SystemError(errorMessage)
221       
222        logging.info("File added to eXist")
223        return True
224
225
226    def deleteEXistFile(self, docPath):
227        '''
228        Delete the input file from eXist DB
229        @param docPath: path of document to delete
230        @return: True, if file deleted successfully
231        '''
232        logging.info("Deleting file, '%s', from eXist DB" %docPath)
233
234        status = self.xmldb.removeDoc(docPath)   
235        if not status:
236            errorMessage = "Command to delete file in eXist did not complete successfully - exiting"
237            logging.error(errorMessage)
238            raise SystemError(errorMessage)
239       
240        logging.info("File deleted from eXist")
241        return True
242
243
244    def createOrUpdateEXistFile(self, xml, collection, fileName):
245        '''
246        Check if a file already exists in eXist; if it does, run an
247        update (which will backup the existing file), otherwise create
248        the file in eXist
249        @param xml: contents of xml file to create/update in eXist
250        @param collection: path of the collection to store the file in
251        @param fileName: name of file to add in eXist
252        '''
253        logging.info("Creating or updating file in eXist...")
254        if not self.isNewEXistFile(collection + fileName):
255            self.backupEXistFile(collection, fileName)
256           
257        self.createEXistFile(xml, collection, fileName)
258
259
260    def getAtomFileCollectionPath(self, atomID):
261        '''
262        Given an atom id, determine and return the collection path in eXist
263        of the associated atom file
264        @param atom: atom id to look up
265        @return: collection path, if it exists, None, otherwise
266        '''
267        logging.info("Looking up collection path for atom ID, '%s'" %atomID)
268        xq = ndgXqueries()['atomFullPath']
269        xq = xq.replace('TargetCollection', ec.BASE_COLLECTION_PATH)
270        xq = xq.replace('LocalID', atomID)
271
272        id, doc = self.xmldb.executeQuery(xq)
273        if doc['hits'] == 0:
274            logging.info("No document found with the specified ID")
275            return None
276
277        doc = self.xmldb.retrieve(id,0,{})
278
279        docET = ET.fromstring(doc)
280        collPath = docET.text + '/'
281        logging.debug("Found collection path, '%s'" %collPath)
282        return collPath
283       
284           
285    def createAtomInExist(self, atom):
286        '''
287        Create an atom in the eXist DB
288        @param atom: atom object to create in the DB
289        '''
290        logging.info("Creating atom in eXist")
291       
292        # if the atom has no dataset ID, generate and add one
293        # NB, this should only be the case when the atom is being created
294        # via the web interface
295        isNew = False
296        if not atom.datasetID:
297            isNew = True
298            atom.setDatasetID(atom.atomTypeID + '_' + str(uuid.uuid1()))
299           
300        eXistCollection = self.getAtomFileCollectionPath(atom.datasetID)
301       
302        # if collection not found, assume we're dealing with a new atom; get its
303        # default collection
304        if not eXistCollection:
305            eXistCollection = atom.getDefaultCollectionPath()
306        elif isNew:
307            # in this situation we're trying to create an atom with the same
308            # name via the web interface - this can't be allowed - so retry to
309            # generate a new ID
310            atom.datasetID = None
311            self.createAtomInExist(atom)
312            return
313        # create backup of atom if it already exists
314        else:
315            self.backupEXistFile(eXistCollection, atom.atomName)
316           
317            # also change updated date to current time
318            atom.updatedDate = datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%SZ")
319           
320        self.createEXistFile(atom.toPrettyXML(), eXistCollection, atom.atomName)
321        logging.info("Atom created in eXist")
322        return atom
Note: See TracBrowser for help on using the repository browser.