source: ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py @ 5296

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py@5296
Revision 5296, 11.1 KB checked in by cbyrom, 13 years ago (diff)

Extend feed client code to allow setting of author data on generic
feed entries + add setting up of discovery feed as standard feed
setup + extend tests.

Line 
1'''
2 Class supporting set up and communication with eXist DB
3 for the purposes of creating and updating atom feeds
4 
5 @author: C Byrom - Tessella 09
6'''
7import logging, httplib
8import feedparser
9from xml.sax.saxutils import escape
10from ndg.common.src.models.vocabtermdata import VocabTermData as VTD
11from ndg.common.src.models.ndgObject import ndgObject
12import ndg.common.src.lib.utilities as utils
13import ndg.common.src.clients.xmldb.eXist.dbconstants as dc
14from ndg.common.src.models import AtomState
15
16
17class FeedClientError(Exception):
18    """
19    Exception handling for when a problem is found with an eXist hosted atom feed
20    """
21    def __init__(self, msg):
22        logging.error(msg)
23        Exception.__init__(self, msg)
24
25class FeedClient(object):
26   
27    TOPLEVEL_ATOM_FEED_TITLE = 'Moles Atom Data'
28    TOPLEVEL_DIF_FEED_TITLE = 'DIF Data'
29    TOPLEVEL_PUBLISHED_FEED_TITLE = 'All Published Data'
30    ENTITYLEVEL_ATOM_FEED_TITLE = 'Moles %s Atom Data'
31    DEPLOYMENTS_ATOM_FEED_TITLE = 'Deployments Data (activities/observation stations/data production tools)'
32    PROVIDER_SUFFIX = ' - for provider, %s'
33    PROVIDERLEVEL_ATOM_FEED_TITLE = TOPLEVEL_ATOM_FEED_TITLE + PROVIDER_SUFFIX
34    PROVIDERLEVEL_DIF_FEED_TITLE = TOPLEVEL_DIF_FEED_TITLE + PROVIDER_SUFFIX
35    CITATIONS_FEED_TITLE = 'Citations data received by Trackback mechanism'
36    CORRECTIONS_FEED_TITLE = 'Corrections data submitted by atom editor users'
37    DISCOVERY_FEED_TITLE = 'Discovery service information'
38    DIF_ENTRY_TITLE = 'DIF Record'
39   
40    REST_BASE_URL = '/exist/rest'
41    FEED_ERROR_MSG = 'Failed to retrieve valid feed - error'
42    FEED_STRING = '<?xml version="1.0" ?><feed xmlns="http://www.w3.org/2005/Atom"> \
43        <title>%s</title></feed>'
44   
45    def __init__(self, auth, dbHostName = None, eXistPortNo = '8080'):
46        '''
47        Initialise a client to the eXist atom feed services
48        @param auth: Basic base64 encoded authentication details for the DB
49        @keyword dbHostName: name of eXist DB to use
50        @keyword eXistPortNo: Port number that the eXist DB is exposed by - defaults
51        to '8080' if not set
52        '''
53        logging.info("Initialising eXist feed client")
54        self.auth = auth
55        self.hostAndPort = '%s:%s' %(dbHostName, eXistPortNo)
56        logging.info("eXist feed client initialised")
57
58
59    def __postEditAtomFeed(self, collection, atomXMLString):
60        '''
61        Post an atom to the eXist atom feed 'edit' interface for the
62        specified collection
63        - if the atom is a feed, this will set up the feed, if it doesn't already
64        exist
65        - if the atom is an entry, this will add an entry to the feed for the
66        collection, if it exists
67        @param collection: eXist collection to associate the atom doc with
68        @param atomXMLString: a string containing an atom doc in XML format
69        @raise FeedClientError: if problems encountered when posting to eXist
70        '''
71        logging.debug("Setting up POST Request to eXist")
72        connection = httplib.HTTPConnection(self.hostAndPort)
73
74        # NB, authorisation is required for accessing eXist via this interface
75        headers = {
76            'Host': self.hostAndPort, 
77            'Content-Type': 'application/atom+xml',
78            'Authorization': 'Basic %s' %self.auth
79                   }
80       
81        connection.request("POST", '/exist/atom/edit' + collection, 
82                           atomXMLString, headers)
83        logging.debug("Get response...")
84        response = connection.getresponse()
85        logging.debug("Response retrieved")
86
87        if response.status == 204:
88            logging.info("No content returned => new feed set up")
89        elif response.status == 201:
90            logging.info("Successfully posted atom to eXist feed")
91        elif response.status == 401 and response.reason.find('+already+exists') > -1:
92            logging.info("Atom already exists")
93        else:
94            logging.debug("Response error:\n\r %s" %response.read())
95            raise FeedClientError("Error encountered: '%s'" %response.reason)
96
97
98    def setupBasicFeeds(self):
99        '''
100        Set up the standard feeds expected of the eXist DB
101        - nb, provider feeds are set up when creating new atoms - when
102        their providers don't already have feeds available
103        '''
104        self.createAtomFeed(dc.ATOM_COLLECTION_PATH,
105                            self.TOPLEVEL_ATOM_FEED_TITLE)
106        self.VTD = VTD()
107        self.deName = VTD.TERM_DATA[VTD.DE_TERM].title
108        for type in [dc.PUBLISHED_COLLECTION_PATH, dc.SMALL_P_PUBLISHED_COLLECTION_PATH]:
109            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DE_COLLECTION_PATH,
110                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DE_TERM].title),
111            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DEPLOYMENT_COLLECTION_PATH,
112                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DEPLOYMENT_TERM].title),
113            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DEPLOYMENTS_COLLECTION_PATH,
114                                self.DEPLOYMENTS_ATOM_FEED_TITLE),
115            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.GRANULE_COLLECTION_PATH,
116                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.GRANULE_TERM].title)
117
118        self.createAtomFeed(dc.ATOM_COLLECTION_PATH + dc.PUBLISHED_COLLECTION_PATH,
119                            self.TOPLEVEL_PUBLISHED_FEED_TITLE)
120
121        self.createAtomFeed(dc.DIF_COLLECTION_PATH,
122                            self.TOPLEVEL_DIF_FEED_TITLE)
123
124        self.createAtomFeed(dc.CORRECTIONS_COLLECTION_PATH,
125                            self.CORRECTIONS_FEED_TITLE)
126
127        self.createAtomFeed(dc.CITATIONS_COLLECTION_PATH,
128                            self.CITATIONS_FEED_TITLE)
129       
130        self.createAtomFeed(dc.DISCOVERY_FEED_PATH,
131                            self.DISCOVERY_FEED_TITLE)
132       
133
134
135    def addAtomToFeeds(self, atom):
136        '''
137        Add an entry to the different feeds associated with the specified atom
138        @param atom: an Atom object which should be linked to as an entry in the
139        eXist feeds
140        '''
141        # add to top level feed
142        self.createAtomFeedEntry(dc.ATOM_COLLECTION_PATH, atom)
143
144        # add to entity level feed
145        self.createAtomFeedEntry(atom.getDefaultEntityCollectionPath(), atom)
146       
147        # add to Published top level feed, if apprpriate
148        if atom.state == AtomState.PUBLISHED_STATE:
149            self.createAtomFeedEntry(dc.ATOM_COLLECTION_PATH + 
150                                     dc.PUBLISHED_COLLECTION_PATH, atom)
151
152        # add to provider level feed
153        self.createAtomFeedEntry(dc.PROVIDER_FEED_PATH + atom.ME.providerID + '/', atom)
154       
155        # add to DIF feeds - if appropriate
156        if atom.isDE():
157            self.createAtomFeedEntry(dc.DIF_COLLECTION_PATH, atom, isDIFRecord=True)
158            self.createAtomFeedEntry(dc.DIF_COLLECTION_PATH + atom.ME.providerID + '/', 
159                                     atom, isDIFRecord=True)
160       
161
162    def createAtomFeed(self, collection, title):
163        '''
164        Set the specified collection up as an atom feed
165        @param collection: eXist collection to set up as an atom feed
166        @param title: title to give the feed
167        '''
168        feedXML = self.FEED_STRING %(title)
169        logging.info("Setting up %s as an atom feed" %collection)
170        self.__postEditAtomFeed(collection, feedXML)
171        logging.info("%s successfully set up as an atom feed" %collection)
172
173
174    def getAtomFeed(self, collection, proxyServer = None):
175        '''
176        Retrieve the contents of the feed for the specified eXist collection
177        - in a feed parser object
178        @param collection: eXist collection to retrieve the feed from
179        @keyword proxyServer: proxy server to use, if required
180        @return feed, xml: feed in a feedparser object and xml original data
181        '''
182        logging.info("Retrieving content of atom feed for collection, '%s'" %collection)
183       
184        uri = 'http://%s/exist/atom/content/%s' %(self.hostAndPort, collection)
185
186        proxy = {}
187        if proxyServer:
188            proxy = {'http':proxyServer}
189        xml = utils.openURLWithProxy(uri, proxy)
190       
191        # NB, feedparser is not fussy when it comes to parsing XML docs - so won't
192        # detect if there's been an error here - so check for this in the XML
193        if xml.find('HTTP ERROR') > -1:
194            errorMessage = "Error returned when retrieving feed: '%s'" %xml
195            logging.error(errorMessage)
196            raise FeedClientError(errorMessage)
197
198        feed = feedparser.parse(xml)
199
200        # check we've got something valid back
201        if feed.bozo:
202            errorMessage = "%s: '%s'" %(self.FEED_ERROR_MSG, feed.bozo_exception)
203            logging.error(errorMessage)
204            raise FeedClientError(errorMessage)
205       
206        return feed, xml
207       
208
209    def createAtomFeedEntry(self, collection, atom, isDIFRecord=False):
210        '''
211        Add an entry to the specified collection (which should already have a
212        feed set up on it) - pointing to the specified atom link
213        @param collection: eXist collection with the atom feed to add the entry to
214        @param atom: Atom to be referenced in the new entry
215        @keyword isDIFRecord: if True, the entry contents are adjusted to
216        point to the DIF doc rather than the atom doc.  Default False.
217        '''
218        titleString = '%s Atom' %atom.atomTypeName
219        browseURL = atom.atomBrowseURL
220        if isDIFRecord:
221            titleString = self.DIF_ENTRY_TITLE
222            browseURL = browseURL.replace('__%s__' %ndgObject.ATOM_DOC_TYPE, 
223                                          '__%s__' %ndgObject.BROWSE_DIF_DOC_TYPE)
224           
225        entryXML = '<?xml version="1.0" ?>\
226<entry xmlns="http://www.w3.org/2005/Atom">\
227<title>%s [%s]</title>\
228<summary>%s</summary>\
229<content src="%s" type="application/atom+xml"/>\
230</entry>' %(titleString, escape(atom.title), escape(atom.summary), browseURL)
231        logging.info("Adding link to atom (%s) as entry in atom feed, %s" \
232                     %(browseURL, collection))
233        self.__postEditAtomFeed(collection, entryXML)
234        logging.info("Successfully added new feed entry")
235       
236
237    def createGenericFeedEntry(self, authorName, title, summary, path):
238        '''
239        Add an entry to the specified collection (which should already have a
240        feed set up on it) - with details of the corrections data submitted by
241        users of the atom editor
242        @param authorName: Name of author of the feed entry
243        @param title: title to give the feed entry
244        @param summary: content to add to the summary element
245        @param path: path to collection for the feed
246        '''
247        entryXML = '<?xml version="1.0" ?>\
248<entry xmlns="http://www.w3.org/2005/Atom">\
249<author><name>%s</name></author>\
250<title>%s</title>\
251<content>%s</content>\
252</entry>' %(escape(authorName), escape(title), escape(summary))
253        logging.info("Adding corrections data to atom feed")
254        self.__postEditAtomFeed(path, entryXML)
255        logging.info("Successfully added new feed entry")
Note: See TracBrowser for help on using the repository browser.