source: exist/trunk/python/ndgUtils/models/existdbclient.py @ 4236

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/models/existdbclient.py@4236
Revision 4236, 11.7 KB checked in by cbyrom, 12 years ago (diff)

Improve the creation of new atom entries in eXist - generating unique
IDs, and checking these, where necessary + fix update date updates +
add methods to the vocab data class to retrieve lists of data categories
+ subtypes - for use in the UI dropdowns.

Line 
1'''
2 Class supporting set up and communication with eXist DB
3 for the purposes of creating and updating atoms
4 
5 @author: C Byrom - Tessella 08
6'''
7import os, sys, logging, datetime
8from ndgUtils.eXistInterface import ndg_eXist
9from ndgUtils.eXistConnector import eXistConnector as ec
10from ndgUtils.ndgXqueries import ndgXqueries
11import uuid
12
13try:
14    from xml.etree import ElementTree as ET
15except ImportError:
16    try:
17        import ElementTree as ET
18    except ImportError:
19        import elementtree.ElementTree as ET
20
21class eXistDBClient:
22   
23   
24    def __init__(self, configFile = None, eXistDBHostname = None):
25        '''
26        Initialise a connection to the eXistDB
27        '''
28        logging.info("Initialising connection to eXist DB")
29        self.eXistDBHostname = eXistDBHostname
30       
31        inputs = {}
32        # Firstly load in config data
33        if configFile:
34            self._loadDBDetails(configFile)
35            inputs['passwordFile'] = configFile
36           
37        if self.eXistDBHostname:
38            inputs['db'] = self.eXistDBHostname
39           
40        # Now set up the connection
41        self.xmldb = ndg_eXist(**inputs)
42       
43        # set up any collections required - NB, if these already exist they won't cause any files to be lost
44        self._setUpEXistAtomCollections()
45        logging.info("eXist DB connection initialised")
46
47
48    def createCollections(self, collections):
49        '''
50        Create the specified collections in eXist
51        @param collections: array of collections to create
52        @return True if successful
53        '''
54        logging.info("Setting up eXist collections")
55        for col in collections:
56            logging.debug("Creating collection, '%s'" %col)
57            self.xmldb.createCollection(col)
58        logging.info("All collections set up")
59
60
61    def _setUpEXistAtomCollections(self):
62        '''
63        Set up the required eXist collections needed for running the granulator script
64        '''
65        logging.info("Ensuring required collections are available in eXist")
66        for col in [ec.BASE_COLLECTION_PATH, ec.BACKUP_COLLECTION_PATH]:
67            for type in [ec.OLD_COLLECTION_PATH, ec.PUBLISHED_COLLECTION_PATH, \
68                         ec.SMALL_P_PUBLISHED_COLLECTION_PATH, ec.WORKING_COLLECTION_PATH]:
69                self.xmldb.createCollection(col)
70                self.xmldb.createCollection(col + type)
71                self.xmldb.createCollection(col + type + ec.DE_COLLECTION_PATH)
72                self.xmldb.createCollection(col + type + ec.DEPLOYMENT_COLLECTION_PATH)
73                self.xmldb.createCollection(col + type + ec.GRANULE_COLLECTION_PATH)
74        logging.info("Required collections available")
75       
76
77    def _loadDBDetails(self, configFile):
78        '''
79        Retrieve info from the eXist db config file
80        '''
81        logging.info("Loading DB config data")
82        # Check this file exists
83        if not os.path.isfile(configFile):
84            errorMessage = "ERROR: Could not find the DB config file, %s; please make sure this " \
85                     "is available from the running directory" %configFile
86            logging.error(errorMessage)
87            raise ValueError(errorMessage)
88        dbinfo_file=open(configFile, "r")
89        dbinfo = dbinfo_file.read().split()
90        if len(dbinfo) < 3:
91            errorMessage = 'Incorrect data in DB config file'
92            logging.error(errorMessage)
93            raise ValueError(errorMessage)
94        self.eXistDBHostname = dbinfo[0]
95        self._username = dbinfo[1]
96        self._pw = dbinfo[2]
97        logging.info("DB config data loaded")
98
99
100    def _lookupEXistFile(self, docPath):
101        '''
102        Look up a file in eXist using XPath
103        @param docPath: path to doc to look up
104        @return: id returned from query, with which to retrieve doc; if doc doesn't exist, return None
105        '''
106        logging.info("Retrieving info for file, '%s'" %docPath)
107       
108        doc = self.xmldb.executeQuery('doc("' + docPath + '")')
109       
110        if doc[1]['hits'] == 0:
111            logging.info("File does not exist in eXist DB")
112            return None
113        logging.info("Found file - returning result ID")
114        return doc[0]
115         
116
117    def getEXistFile(self, docPath):
118        '''
119        Use XQuery to retrieve the specified document from eXist
120        @param docPath: the path of the doc to retrieve
121        @return: contents of document if exists, None otherwise
122        '''
123        id = self._lookupEXistFile(docPath)
124       
125        if not id:
126            logging.info("No file found - nothing to retrieve")
127            return None
128       
129        logging.info("Found file - now retrieving content")
130        doc = self.xmldb.retrieve(id, 0)
131        return doc
132
133
134    def isNewEXistFile(self, docPath):
135        '''
136        Backup a file that exists in the eXist DB
137        @param docPath: path of file in eXist to backup
138        '''
139        logging.info("Checking if file, '%s', exists in eXist DB" %docPath)
140       
141        id = self._lookupEXistFile(docPath)
142
143        if id:
144            return False
145       
146        return True
147
148
149    def _addTimeStamp(self, fileName):
150        '''
151        Add timestamp to input filename
152        NB, this assumes there is a file type identifier at the end of the filename; if so, the datestamp
153        is included before this; if not it is just added at the end
154        '''
155        bits = fileName.rsplit(".", 1)
156        fileName = bits[0] + "_" + datetime.datetime.today().strftime("%Y-%m-%dT%H_%M_%S")
157       
158        if len(bits) > 1:
159            fileName += "." + bits[1]
160        return fileName
161
162
163    def backupEXistFile(self, collection, fileName):
164        '''
165        Backup a file that exists in the eXist DB
166        - NB, this really just creates a new file with the same contents in a
167        backup dir
168        @param collection: path of the collection to store the file in
169        @param fileName: name of file to add in eXist
170        @return: path to new backup file
171        '''
172        docPath = collection + fileName
173        logging.info("Backing up file, '%s', in eXist DB" %docPath)
174
175        logging.debug("Firstly, retrieve file contents from eXist")
176        doc = self.getEXistFile(docPath)
177        if not doc:
178            errorMessage = "Could not retrieve file contents (%s) to backup - exiting." %docPath
179            logging.error(errorMessage)
180            raise SystemError(errorMessage)
181       
182        # Now adjust the collection to map to the backup dir
183        collection = collection.replace(ec.BASE_COLLECTION_PATH, ec.BACKUP_COLLECTION_PATH)
184        collection = collection.replace(ec.NDG_A_COLLECTION_PATH, ec.NDG_A_COLLECTION_PATH_BACKUP)
185       
186        # add timestamp to filename
187        fileName = self._addTimeStamp(fileName)
188        docPath = collection + fileName
189
190        logging.debug("Now creating backup file, '%s'" %fileName)
191        self.createEXistFile(doc, collection, fileName)
192       
193        logging.info("File backed up in eXist")
194        return docPath
195
196
197    def createEXistFile(self, xml, collection, fileName):
198        '''
199        Add the input file to the eXist DB
200        @param xml: contents of xml file to create in eXist
201        @param collection: path of the collection to store the file in
202        @param fileName: name of file to add in eXist
203        @return: True, if file created successfully
204        '''
205        logging.info("Adding file, '%s' to eXist DB collection, '%s'" \
206                     %(fileName, collection))
207        logging.debug("data: %s" %xml)
208
209        # create the collection, in case it doesn't already exist - NB, this won't overwrite anything
210        self.createCollections([collection])
211        status = self.xmldb.storeXML(xml, collection + "/" + fileName, overwrite=1)   
212        if not status:
213            errorMessage = "Command to create file in eXist did not complete successfully - exiting"
214            logging.error(errorMessage)
215            raise SystemError(errorMessage)
216       
217        logging.info("File added to eXist")
218        return True
219
220
221    def deleteEXistFile(self, docPath):
222        '''
223        Delete the input file from eXist DB
224        @param docPath: path of document to delete
225        @return: True, if file deleted successfully
226        '''
227        logging.info("Deleting file, '%s', from eXist DB" %docPath)
228
229        status = self.xmldb.removeDoc(docPath)   
230        if not status:
231            errorMessage = "Command to delete file in eXist did not complete successfully - exiting"
232            logging.error(errorMessage)
233            raise SystemError(errorMessage)
234       
235        logging.info("File deleted from eXist")
236        return True
237
238
239    def createOrUpdateEXistFile(self, xml, collection, fileName):
240        '''
241        Check if a file already exists in eXist; if it does, run an
242        update (which will backup the existing file), otherwise create
243        the file in eXist
244        @param xml: contents of xml file to create/update in eXist
245        @param collection: path of the collection to store the file in
246        @param fileName: name of file to add in eXist
247        '''
248        logging.info("Creating or updating file in eXist...")
249        if not self.isNewEXistFile(collection + fileName):
250            self.backupEXistFile(collection, fileName)
251           
252        self.createEXistFile(xml, collection, fileName)
253
254
255    def getAtomFileCollectionPath(self, atomID):
256        '''
257        Given an atom id, determine and return the collection path in eXist
258        of the associated atom file
259        @param atom: atom id to look up
260        @return: collection path, if it exists, None, otherwise
261        '''
262        logging.info("Looking up collection path for atom ID, '%s'" %atomID)
263        xq = ndgXqueries()['atomFullPath']
264        xq = xq.replace('TargetCollection', ec.BASE_COLLECTION_PATH)
265        xq = xq.replace('LocalID', atomID)
266
267        id, doc = self.xmldb.executeQuery(xq)
268        if doc['hits'] == 0:
269            logging.info("No document found with the specified ID")
270            return None
271
272        doc = self.xmldb.retrieve(id,0,{})
273
274        docET = ET.fromstring(doc)
275        collPath = docET.text + '/'
276        logging.debug("Found collection path, '%s'" %collPath)
277        return collPath
278       
279           
280    def createAtomInExist(self, atom):
281        '''
282        Create an atom in the eXist DB
283        @param atom: atom object to create in the DB
284        @return atom: atom object saved in eXist
285        '''
286        logging.info("Creating atom in eXist")
287       
288        # if the atom has no dataset ID, generate and add one
289        # NB, this should only be the case when the atom is being created
290        # via the web interface
291        isNew = False
292        if not atom.datasetID:
293            isNew = True
294            atom.setDatasetID(atom.atomTypeID + '_' + str(uuid.uuid1()))
295           
296        eXistCollection = self.getAtomFileCollectionPath(atom.datasetID)
297       
298        # if collection not found, assume we're dealing with a new atom; get its
299        # default collection
300        if not eXistCollection:
301            eXistCollection = atom.getDefaultCollectionPath()
302        elif isNew:
303            # in this situation we're trying to create an atom with the same
304            # name via the web interface - this can't be allowed - so retry to
305            # generate a new ID
306            atom.datasetID = None
307            self.createAtomInExist(atom)
308            return
309        # create backup of atom if it already exists
310        else:
311            self.backupEXistFile(eXistCollection, atom.atomName)
312           
313            # also change updated date to current time
314            atom.updatedDate = datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%SZ")
315           
316        self.createEXistFile(atom.toPrettyXML(), eXistCollection, atom.atomName)
317           
318        logging.info("Atom created in eXist")
319        return atom
Note: See TracBrowser for help on using the repository browser.