source: ndgCommon/trunk/ndg/common/src/clients/xmldb/abstractxmldbatomclient.py @ 5094

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/abstractxmldbatomclient.py
Revision 5094, 5.2 KB checked in by cbyrom, 11 years ago (diff)

Improve checking for atom feeds when adding new entries.

Line 
1'''
2 Abstract implement of for providing functionality to support Atom
3 document usage
4 
5 @author: C Byrom - Tessella, Feb 09
6'''
7import os, sys, logging
8from threading import Thread
9from ndg.common.src.clients.xmldb.interfacexmldbatomclient import InterfaceXMLDBAtomClient
10from ndg.common.src.lib.atomutilities import *
11
12
13class DuplicateError(Exception):
14    """
15    Exception handling for when a duplicated atom doc is discovered
16    """
17    def __init__(self, msg):
18        logging.error(msg)
19        Exception.__init__(self, msg)
20
21
22class publishingThread(Thread):
23    '''
24    Class to allow publishing of atom docs asynchronously - i.e. in a new thread
25    '''
26   
27    def __init__ (self, atomClient, atom):
28        '''
29        Constructor for setting up thread
30        @param: AtomClient instance to do the publishing
31        @param: Atom object to publish
32        '''
33        logging.info("Setting up thread to publish atom data for atom, '%s'" %atom.datasetID)
34        Thread.__init__(self)
35        self.atomClient = atomClient
36        self.atom = atom
37        logging.info("- finished setting up thread")
38     
39    def run(self):
40        logging.info("Running thread to publish atom data for atom, '%s'" %self.atom.datasetID)
41        self.atomClient._AtomClient__publishAtom(self.atom)
42        logging.info("- finished publishing atom data")
43
44
45class AbstractXMLDBAtomClient(InterfaceXMLDBAtomClient):
46
47    # avoid looking up datacentre DIF data more than we have to by storing
48    # it in this dict on first retrieval
49    dataCentres = {}
50   
51    # keep track of atom docs and their collections - to reduce db lookups - use
52    # key = filename, val = collectionPath
53    atomCollections = {}
54   
55    def deleteAtom(self, atom):
56        '''
57        Delete the given atom from the XML DB - using the atom
58        details to work out the required path to delete
59        '''
60        logging.info("Deleting atom from eXist")
61        self.deleteDoc(atom.getFullPath())
62        logging.info("Atom deleted")
63
64
65    def _createDIFDocumentFromAtom(self, atom, difCollectionPath):
66        '''
67        Transform an atom into a DIF document and store this in the XML DB
68        - also publish data to relevant feeds, if appropriate
69        @param atom: the Atom data model to convert
70        @param difCollectionPath: path to the collection storing DIF documents
71        @return difFilePath: path to the created DIF file
72        '''
73        logging.info("Creating and storing DIF document")
74        doc = self._transformAtomIntoDIF(atom)
75        fileName = atom.atomName.replace('.atom', '.xml')
76       
77        # now add to eXist
78        providerID = atom.ME.providerID
79        collectionPath = difCollectionPath + providerID
80       
81        # NB, check feed exists, if not, set it up
82        setupFeed = False
83        if self.isNewDoc(collectionPath + '/.feed.atom'):
84            setupFeed = True
85
86        self.createOrUpdateDoc(doc, collectionPath, fileName)   
87             
88        if setupFeed:
89            logging.info("Creating feed for new provider ID")
90            self.createCollections([collectionPath])
91            self.feedClient.createAtomFeed(collectionPath,
92                                           self.feedClient.PROVIDERLEVEL_DIF_FEED_TITLE %providerID)
93        logging.info("DIF document created and stored")
94        return collectionPath + '/' + fileName
95
96
97    def _transformAtomIntoDIF(self, atom):
98        '''
99        Transform an atom into a DIF document - using an XQuery transform ran
100        in the XML DB
101        @param atom: the Atom data model to convert
102        @return: the produced DIF document
103        '''
104        logging.info("Creating DIF record from atom - using XQuery transform")
105       
106        # get the query and set this up to use properly
107        targetCollection = atom.getPublicationStatePath()
108        providerID = atom.ME.providerID
109       
110        # add a trailing forward slash - otherwise the xquery won't work
111        if not targetCollection.endswith('/'):
112            targetCollection = targetCollection + '/'
113           
114        # NB, using the query() method (via the useChunked keyword) here results
115        # in the namespaces being expanded - which ET doesn't like
116        doc = self.buildAndRunQuery('atom2DIF',
117                                    targetCollection,
118                                    providerID, 
119                                    atom.datasetID,
120                                    useChunked = False)
121
122        if not doc:
123            raise Exception("XQuery produced no results - suggesting a problem with the query")
124       
125        # add various missing info
126        # get the organisation data for the repository
127        # - NB, store for re-use to avoid multiple lookups of the same info
128        if self.dataCentres.has_key(providerID):
129            dataCentre = self.dataCentres[providerID]
130        else:
131            dataCentre = getDataCentreDIFElement(providerID, self)
132            self.dataCentres[providerID] = dataCentre
133
134        # add various other data to atoms - to make up with the incomplete data spec there
135        doc = addOrgData(doc[0], targetCollection, dataCentre)
136        doc = expandParametersData(doc)
137        doc = addStandardKeywords(doc)
138
139        logging.info("Transform completed successfully - returning DIF doc")
140        return doc
141
Note: See TracBrowser for help on using the repository browser.