source: ndgCommon/trunk/ndg/common/src/clients/interfaceclient.py @ 4793

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/interfaceclient.py@4832
Revision 4793, 13.9 KB checked in by cbyrom, 11 years ago (diff)

Checking in initial codebase for ndgUtils restructure.

Line 
1'''
2 Basic interface for clients to implement for the searching and manipulation
3 of metadata documents
4 
5 @author: C Byrom - Tessella, Jan 09
6'''
7import os, sys, logging
8from interfacesearchclient import InterfaceSearchClient
9
10class InterfaceClient(InterfaceSearchClient):
11   
12    def __init__():
13        '''
14        '''
15        logging.info("Initialising connection to eXist DB")
16        logging.info("eXist DB connection initialised")
17
18
19    def getEXistFile(self, docPath):
20        '''
21        Use XQuery to retrieve the specified document from eXist
22        @param docPath: the path of the doc to retrieve
23        @return: contents of document if exists, None otherwise
24        '''
25        id = self.__lookupEXistFile(docPath)
26       
27        if not id and id != 0:
28            logging.info("No file found - nothing to retrieve")
29            return None
30       
31        logging.info("Found file - now retrieving content")
32        doc = self.xmldb.retrieve(id, 0)
33        return doc
34
35
36    def isNewEXistFile(self, docPath):
37        '''
38        Test if a file already exists in eXist
39        @param docPath: path of file in eXist to look up
40        @return: True if a new file, False if otherwise
41        '''
42        logging.info("Checking if file, '%s', exists in eXist DB" %docPath)
43       
44        id = self.__lookupEXistFile(docPath)
45
46        if id:
47            return False
48       
49        return True
50
51
52    def backupEXistFile(self, collection, fileName, runAsynch = True):
53        '''
54        Backup a file that exists in the eXist DB
55        - NB, this really just creates a new file with the same contents in a
56        backup dir
57        - to improve efficiency, spawn this process as a new thread since we
58        don't need to worry about the outcome
59        @param collection: path of the collection to store the file in
60        @param fileName: name of file to add in eXist
61        @param runAsynch: if True, do the backup asynchronously in a separate thread
62        @return: path to new backup file
63        '''
64        if not collection.endswith('/'):
65            collection += '/'
66           
67        docPath = collection + fileName
68        logging.info("Backing up file, '%s', in eXist DB" %docPath)
69
70        logging.debug("Firstly, retrieve file contents from eXist")
71        doc = self.getEXistFile(docPath)
72        if not doc:
73            errorMessage = "Could not retrieve file contents (%s) to backup - exiting." %docPath
74            logging.error(errorMessage)
75            raise SystemError(errorMessage)
76       
77        # Now adjust the collection to map to the backup dir
78        collection = collection.replace(ec.BASE_COLLECTION_PATH, ec.BACKUP_COLLECTION_PATH)
79        collection = collection.replace(ec.NDG_A_COLLECTION_PATH, ec.NDG_A_COLLECTION_PATH_BACKUP)
80       
81        # add timestamp to filename
82        fileName = self.__addTimeStamp(fileName)
83        docPath = collection + fileName
84       
85        if runAsynch:
86            # run the back up in a separate thread
87            thread = backingUpThread(self, doc, collection, fileName)
88            thread.start()
89        else:
90            self.createEXistFile(doc, collection, fileName)
91           
92        return docPath
93
94
95    def createEXistFile(self, xml, collection, fileName):
96        '''
97        Add the input file to the eXist DB
98        @param xml: contents of xml file to create in eXist
99        @param collection: path of the collection to store the file in
100        @param fileName: name of file to add in eXist
101        @return: True, if file created successfully
102        '''
103        logging.info("Adding file, '%s' to eXist DB collection, '%s'" \
104                     %(fileName, collection))
105        logging.debug("data: %s" %xml)
106
107        # create the collection, in case it doesn't already exist - NB, this won't overwrite anything
108        self.createCollections([collection])
109        status = self.xmldb.storeXML(xml, collection + "/" + fileName, overwrite=1)   
110        if not status:
111            errorMessage = "Command to create file in eXist did not complete successfully - exiting"
112            logging.error(errorMessage)
113            raise SystemError(errorMessage)
114       
115        logging.info("File added to eXist")
116        return True
117
118
119    def deleteEXistFile(self, docPath):
120        '''
121        Delete the input file from eXist DB
122        @param docPath: path of document to delete
123        @return: True, if file deleted successfully
124        '''
125        logging.info("Deleting file, '%s', from eXist DB" %docPath)
126
127        status = self.xmldb.removeDoc(docPath)   
128        if not status:
129            errorMessage = "Command to delete file in eXist did not complete successfully - exiting"
130            logging.error(errorMessage)
131            raise SystemError(errorMessage)
132       
133        logging.info("File deleted from eXist")
134        return True
135
136
137    def createOrUpdateEXistFile(self, xml, collection, fileName):
138        '''
139        Check if a file already exists in eXist; if it does, run an
140        update (which will backup the existing file), otherwise create
141        the file in eXist
142        @param xml: contents of xml file to create/update in eXist
143        @param collection: path of the collection to store the file in
144        @param fileName: name of file to add in eXist
145        '''
146        logging.info("Creating or updating file in eXist...")
147        if not self.isNewEXistFile(collection + fileName):
148            self.backupEXistFile(collection, fileName)
149           
150        self.createEXistFile(xml, collection, fileName)
151
152
153    def getAllAtomIDs(self):
154        '''
155        Retrieve all the atom IDs in the atoms directory - NB, this can
156        be a quick way of producing a cache of data to check - e.g. to avoid
157        multiple calls to getAtomFileCollectionPath
158        @return: ids - array of all atom IDs
159        '''
160        logging.info("Retrieving all atom ids")
161        xq = self.xmldb.xq.actual('atomList', '/db/atoms', '', '')
162        id, doc = self.xmldb.executeQuery(xq)
163        if doc['hits'] == 0: 
164            return []
165       
166        indices = range(doc['hits'])
167       
168        doc = self.xmldb.retrieve(id, 0)
169        et = ET.fromstring(doc)
170        ids = []
171        for member in et:
172            fn = member.findtext('{http://www.w3.org/2005/Atom}repositoryID')
173            ids.append(fn)
174        logging.debug("Found ids, '%s'" %ids)
175        return ids
176
177
178    def getAllAtomCollections(self):
179        '''
180        Get all atom collection paths and store in a dictionary - for easy
181        reference when doing lots of things at once
182        @return: dict with key/val of atomID/collectionPath
183        '''
184        logging.info("Retrieving all atom collection paths")
185        xq = self.xmldb.xq.actual('atomList', '/db/atoms', '', '')
186        id, doc = self.xmldb.executeQuery(xq)
187        if doc['hits'] == 0: 
188            return []
189       
190        indices = range(doc['hits'])
191       
192        doc = self.xmldb.retrieve(id, 0)
193        et = ET.fromstring(doc)
194        colData = {}
195        for member in et:
196            collection = member.findtext('{http://www.w3.org/2005/Atom}fileName')
197            fileName = collection.split('/')[-1]
198            fileName = fileName.split('.')[0]
199            dir = '/'.join(collection.split('/')[0:-1])
200            colData[fileName] = dir
201
202        logging.debug("Finished looking up atom paths")
203        return colData
204
205
206    def getAtomPublicationState(self, atomID):
207        '''
208        Retrieve the publication state of the specified atom - by
209        checking the collection it is in
210        @param atom: atom id to look up
211        @return: AtomState for the atom.  NB, if the ID is not found, assume
212        we're dealing with a new atom and set the state as the working state
213        '''
214        logging.debug("Finding atom publication state")
215        path = self.getAtomFileCollectionPath(atomID)
216        for state in AtomState.allStates.values():
217            if path.find('/%s' %state.collectionPath) > -1:
218                logging.debug("- state found: '%s'" %state.title)
219                return state
220       
221        logging.debug("- state not found - returning WORKING state")
222        return AtomState.WORKING_STATE
223
224
225    def getAtomFileCollectionPath(self, atomID):
226        '''
227        Given an atom id, determine and return the collection path in eXist
228        of the associated atom file
229        @param atom: atom id to look up
230        @return: collection path, if it exists, None, otherwise
231        '''
232        logging.info("Looking up collection path for atom ID, '%s'" %atomID)
233        xq = self.xmldb.xq['atomFullPath']
234        xq = xq.replace('TargetCollection', ec.BASE_COLLECTION_PATH)
235        xq = xq.replace('LocalID', atomID)
236
237        id, doc = self.xmldb.executeQuery(xq)
238        if doc['hits'] == 0:
239            logging.info("No document found with the specified ID")
240            return None
241
242        doc = self.xmldb.retrieve(id,0,{})
243
244        docET = ET.fromstring(doc)
245        collPath = docET.text + '/'
246        logging.debug("Found collection path, '%s'" %collPath)
247        return collPath
248
249
250    def deleteAtomInExist(self, atom):
251        '''
252        Delete the given atom from the eXist DB - using the atom
253        details to work out the required path to delete
254        '''
255        logging.info("Deleting atom from eXist")
256        atomPath = atom.getDefaultCollectionPath() + atom.atomName
257        self.deleteEXistFile(atomPath)
258        logging.info("Atom deleted")
259
260
261    def changeAtomPublicationStateInExist(self, atom, newState):
262        '''
263        Adjust the publication state of an atom in eXist
264        @param atom: the Atom data model of the atom whose publication state
265        needs to change
266        @param newState: an AtomState object representing the new publication
267        state of the atom
268        @return atom: atom data model with updated state
269        '''
270        logging.info("Changing the publication state of atom - from '%s' to '%s'" \
271                     %(atom.state.title, newState.title))
272        oldState = atom.state
273        # firstly, create atom in new publication state collection - so data isn't
274        # lost if this fails
275        atom.state = newState
276        self.createEXistFile(atom.toPrettyXML(), atom.getDefaultCollectionPath(), 
277                             atom.atomName)
278       
279        # now delete atom in the old publication state
280        atom.state = oldState
281        self.deleteAtomInExist(atom)
282        logging.info("- atom created in new publication state and removed from old one")
283        atom.state = newState
284       
285        # update feeds + create DIFs, if needed
286        if atom.isPublished():
287            self.runAsynchAtomPublish(atom)
288       
289        return atom
290       
291
292           
293    def createAtomInExist(self, atom, replaceAtom = True, runAsynch = True):
294        '''
295        Create an atom in the eXist DB - using the atom contents to work out
296        the location + data set ID
297        @param atom: atom object to create in the DB
298        @keyword replaceAtom: if False and the atom is already available in eXist
299        @param runAsynch: if True, if a backup of an existing file, do this
300        asynchronously in a separate thread
301        then raise a ValueError.
302        '''
303        logging.info("Creating atom in eXist")
304        if not atom:
305            raise ValueError("Input is not an object - cannot create in eXist")
306        if not isinstance(atom, Atom):
307            raise ValueError("Input object is not an Atom object - cannot create in eXist")
308       
309        # if the atom has no dataset ID, generate and add one
310        # NB, this should only be the case when the atom is being created
311        # via the web interface
312        isNew = False
313        if not atom.datasetID:
314            isNew = True
315            atom.setDatasetID(atom.atomTypeID + '_' + str(uuid.uuid1()))
316
317        eXistCollection = None
318        if self.collections is not None: # cope with empty dict
319            eXistCollection = self.collections.get(atom.datasetID)
320        else:
321            eXistCollection = self.getAtomFileCollectionPath(atom.datasetID)
322       
323        # if collection not found, assume we're dealing with a new atom; get its
324        # default collection
325        if not eXistCollection:
326            eXistCollection = atom.getDefaultCollectionPath()
327
328            # check if we need a new provider feed set up
329            providerCollection = ec.PROVIDER_FEED_PATH + atom.ME.providerID + '/'
330            if self.isNewEXistCollection(providerCollection):
331                logging.info("Creating feed for new provider ID")
332                self.createCollections([providerCollection])
333                self.feedClient.createAtomFeed(providerCollection,
334                                               self.feedClient.PROVIDERLEVEL_ATOM_FEED_TITLE %atom.ME.providerID)
335           
336        elif isNew:
337            # in this situation we're trying to create an atom with the same
338            # name via the web interface - this can't be allowed - so retry to
339            # generate a new ID
340            atom.datasetID = None
341            self.createAtomInExist(atom)
342            return
343        # create backup of atom if it already exists
344        else:
345            if not replaceAtom:
346                raise DuplicateError('An atom with the specified ID (%s) already exists in eXist' \
347                                     %atom.datasetID)
348            # store name of backup - to allow restore, if subsequent ops fail
349            self.backupName = self.backupEXistFile(eXistCollection, atom.atomName, \
350                                                   runAsynch = runAsynch)
351           
352            # also change updated date to current time
353            atom.updatedDate = datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%SZ")
354           
355        self.createEXistFile(atom.toPrettyXML(), eXistCollection, atom.atomName)
356       
357        logging.info("Atom created in eXist")
358       
359        # lastly, if we're dealing with a published atom, update DIF records +
360        # feeds
361        if atom.isPublished():
362            self.runAsynchAtomPublish(atom)
363           
364        return atom
Note: See TracBrowser for help on using the repository browser.