source: ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py @ 5215

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py@5215
Revision 5215, 10.8 KB checked in by cbyrom, 13 years ago (diff)

Remove Summary property and just use the simpler 'summary' single
string field.

Line 
1'''
2 Class supporting set up and communication with eXist DB
3 for the purposes of creating and updating atom feeds
4 
5 @author: C Byrom - Tessella 09
6'''
7import logging, httplib
8import feedparser
9from xml.sax.saxutils import escape
10from ndg.common.src.models.vocabtermdata import VocabTermData as VTD
11from ndg.common.src.models.ndgObject import ndgObject
12import ndg.common.src.lib.utilities as utils
13import ndg.common.src.clients.xmldb.eXist.dbconstants as dc
14from ndg.common.src.models import AtomState
15
16
17class FeedClientError(Exception):
18    """
19    Exception handling for when a problem is found with an eXist hosted atom feed
20    """
21    def __init__(self, msg):
22        logging.error(msg)
23        Exception.__init__(self, msg)
24
25class FeedClient(object):
26   
27    TOPLEVEL_ATOM_FEED_TITLE = 'Moles Atom Data'
28    TOPLEVEL_DIF_FEED_TITLE = 'DIF Data'
29    TOPLEVEL_PUBLISHED_FEED_TITLE = 'All Published Data'
30    ENTITYLEVEL_ATOM_FEED_TITLE = 'Moles %s Atom Data'
31    DEPLOYMENTS_ATOM_FEED_TITLE = 'Deployments Data (activities/observation stations/data production tools)'
32    PROVIDER_SUFFIX = ' - for provider, %s'
33    PROVIDERLEVEL_ATOM_FEED_TITLE = TOPLEVEL_ATOM_FEED_TITLE + PROVIDER_SUFFIX
34    PROVIDERLEVEL_DIF_FEED_TITLE = TOPLEVEL_DIF_FEED_TITLE + PROVIDER_SUFFIX
35    CITATIONS_FEED_TITLE = 'Citations data received by Trackback mechanism'
36    CORRECTIONS_FEED_TITLE = 'Corrections data submitted by atom editor users'
37    DIF_ENTRY_TITLE = 'DIF Record'
38   
39    REST_BASE_URL = '/exist/rest'
40    FEED_ERROR_MSG = 'Failed to retrieve valid feed - error'
41    FEED_STRING = '<?xml version="1.0" ?><feed xmlns="http://www.w3.org/2005/Atom"> \
42        <title>%s</title></feed>'
43   
44    def __init__(self, auth, dbHostName = None, eXistPortNo = '8080'):
45        '''
46        Initialise a client to the eXist atom feed services
47        @param auth: Basic base64 encoded authentication details for the DB
48        @keyword dbHostName: name of eXist DB to use
49        @keyword eXistPortNo: Port number that the eXist DB is exposed by - defaults
50        to '8080' if not set
51        '''
52        logging.info("Initialising eXist feed client")
53        self.auth = auth
54        self.hostAndPort = '%s:%s' %(dbHostName, eXistPortNo)
55        logging.info("eXist feed client initialised")
56
57
58    def __postEditAtomFeed(self, collection, atomXMLString):
59        '''
60        Post an atom to the eXist atom feed 'edit' interface for the
61        specified collection
62        - if the atom is a feed, this will set up the feed, if it doesn't already
63        exist
64        - if the atom is an entry, this will add an entry to the feed for the
65        collection, if it exists
66        @param collection: eXist collection to associate the atom doc with
67        @param atomXMLString: a string containing an atom doc in XML format
68        @raise FeedClientError: if problems encountered when posting to eXist
69        '''
70        logging.debug("Setting up POST Request to eXist")
71        connection = httplib.HTTPConnection(self.hostAndPort)
72
73        # NB, authorisation is required for accessing eXist via this interface
74        headers = {
75            'Host': self.hostAndPort, 
76            'Content-Type': 'application/atom+xml',
77            'Authorization': 'Basic %s' %self.auth
78                   }
79       
80        connection.request("POST", '/exist/atom/edit' + collection, 
81                           atomXMLString, headers)
82        logging.debug("Get response...")
83        response = connection.getresponse()
84        logging.debug("Response retrieved")
85
86        if response.status == 204:
87            logging.info("No content returned => new feed set up")
88        elif response.status == 201:
89            logging.info("Successfully posted atom to eXist feed")
90        elif response.status == 401 and response.reason.find('+already+exists') > -1:
91            logging.info("Atom already exists")
92        else:
93            logging.debug("Response error:\n\r %s" %response.read())
94            raise FeedClientError("Error encountered: '%s'" %response.reason)
95
96
97    def setupBasicFeeds(self):
98        '''
99        Set up the standard feeds expected of the eXist DB
100        - nb, provider feeds are set up when creating new atoms - when
101        their providers don't already have feeds available
102        '''
103        self.createAtomFeed(dc.ATOM_COLLECTION_PATH,
104                            self.TOPLEVEL_ATOM_FEED_TITLE)
105        self.VTD = VTD()
106        self.deName = VTD.TERM_DATA[VTD.DE_TERM].title
107        for type in [dc.PUBLISHED_COLLECTION_PATH, dc.SMALL_P_PUBLISHED_COLLECTION_PATH]:
108            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DE_COLLECTION_PATH,
109                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DE_TERM].title),
110            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DEPLOYMENT_COLLECTION_PATH,
111                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DEPLOYMENT_TERM].title),
112            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DEPLOYMENTS_COLLECTION_PATH,
113                                self.DEPLOYMENTS_ATOM_FEED_TITLE),
114            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.GRANULE_COLLECTION_PATH,
115                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.GRANULE_TERM].title)
116
117        self.createAtomFeed(dc.ATOM_COLLECTION_PATH + dc.PUBLISHED_COLLECTION_PATH,
118                            self.TOPLEVEL_PUBLISHED_FEED_TITLE)
119
120        self.createAtomFeed(dc.DIF_COLLECTION_PATH,
121                            self.TOPLEVEL_DIF_FEED_TITLE)
122
123        self.createAtomFeed(dc.CORRECTIONS_COLLECTION_PATH,
124                            self.CORRECTIONS_FEED_TITLE)
125
126        self.createAtomFeed(dc.CITATIONS_COLLECTION_PATH,
127                            self.CITATIONS_FEED_TITLE)
128
129
130    def addAtomToFeeds(self, atom):
131        '''
132        Add an entry to the different feeds associated with the specified atom
133        @param atom: an Atom object which should be linked to as an entry in the
134        eXist feeds
135        '''
136        # add to top level feed
137        self.createAtomFeedEntry(dc.ATOM_COLLECTION_PATH, atom)
138
139        # add to entity level feed
140        self.createAtomFeedEntry(atom.getDefaultEntityCollectionPath(), atom)
141       
142        # add to Published top level feed, if apprpriate
143        if atom.state == AtomState.PUBLISHED_STATE:
144            self.createAtomFeedEntry(dc.ATOM_COLLECTION_PATH + 
145                                     dc.PUBLISHED_COLLECTION_PATH, atom)
146
147        # add to provider level feed
148        self.createAtomFeedEntry(dc.PROVIDER_FEED_PATH + atom.ME.providerID + '/', atom)
149       
150        # add to DIF feeds - if appropriate
151        if atom.isDE():
152            self.createAtomFeedEntry(dc.DIF_COLLECTION_PATH, atom, isDIFRecord=True)
153            self.createAtomFeedEntry(dc.DIF_COLLECTION_PATH + atom.ME.providerID + '/', 
154                                     atom, isDIFRecord=True)
155       
156
157    def createAtomFeed(self, collection, title):
158        '''
159        Set the specified collection up as an atom feed
160        @param collection: eXist collection to set up as an atom feed
161        @param title: title to give the feed
162        '''
163        feedXML = self.FEED_STRING %(title)
164        logging.info("Setting up %s as an atom feed" %collection)
165        self.__postEditAtomFeed(collection, feedXML)
166        logging.info("%s successfully set up as an atom feed" %collection)
167
168
169    def getAtomFeed(self, collection, proxyServer = None):
170        '''
171        Retrieve the contents of the feed for the specified eXist collection
172        - in a feed parser object
173        @param collection: eXist collection to retrieve the feed from
174        @keyword proxyServer: proxy server to use, if required
175        @return feed, xml: feed in a feedparser object and xml original data
176        '''
177        logging.info("Retrieving content of atom feed for collection, '%s'" %collection)
178       
179        uri = 'http://%s/exist/atom/content/%s' %(self.hostAndPort, collection)
180
181        proxy = {}
182        if proxyServer:
183            proxy = {'http':proxyServer}
184        xml = utils.openURLWithProxy(uri, proxy)
185       
186        # NB, feedparser is not fussy when it comes to parsing XML docs - so won't
187        # detect if there's been an error here - so check for this in the XML
188        if xml.find('HTTP ERROR') > -1:
189            errorMessage = "Error returned when retrieving feed: '%s'" %xml
190            logging.error(errorMessage)
191            raise FeedClientError(errorMessage)
192
193        feed = feedparser.parse(xml)
194
195        # check we've got something valid back
196        if feed.bozo:
197            errorMessage = "%s: '%s'" %(self.FEED_ERROR_MSG, feed.bozo_exception)
198            logging.error(errorMessage)
199            raise FeedClientError(errorMessage)
200       
201        return feed, xml
202       
203
204    def createAtomFeedEntry(self, collection, atom, isDIFRecord=False):
205        '''
206        Add an entry to the specified collection (which should already have a
207        feed set up on it) - pointing to the specified atom link
208        @param collection: eXist collection with the atom feed to add the entry to
209        @param atom: Atom to be referenced in the new entry
210        @keyword isDIFRecord: if True, the entry contents are adjusted to
211        point to the DIF doc rather than the atom doc.  Default False.
212        '''
213        titleString = '%s Atom' %atom.atomTypeName
214        browseURL = atom.atomBrowseURL
215        if isDIFRecord:
216            titleString = self.DIF_ENTRY_TITLE
217            browseURL = browseURL.replace('__%s__' %ndgObject.ATOM_DOC_TYPE, 
218                                          '__%s__' %ndgObject.BROWSE_DIF_DOC_TYPE)
219           
220        entryXML = '<?xml version="1.0" ?>\
221<entry xmlns="http://www.w3.org/2005/Atom">\
222<title>%s [%s]</title>\
223<summary>%s</summary>\
224<content src="%s" type="application/atom+xml"/>\
225</entry>' %(titleString, escape(atom.title), escape(atom.summary), browseURL)
226        logging.info("Adding link to atom (%s) as entry in atom feed, %s" \
227                     %(browseURL, collection))
228        self.__postEditAtomFeed(collection, entryXML)
229        logging.info("Successfully added new feed entry")
230       
231
232    def createGenericFeedEntry(self, title, summary, path):
233        '''
234        Add an entry to the specified collection (which should already have a
235        feed set up on it) - with details of the corrections data submitted by
236        users of the atom editor
237        @param title: title to give the feed entry
238        @param summary: content to add to the summary element
239        @param path: path to collection for the feed
240        '''
241        entryXML = '<?xml version="1.0" ?>\
242<entry xmlns="http://www.w3.org/2005/Atom">\
243<title>%s</title>\
244<content>%s</content>\
245</entry>' %(escape(title), escape(summary))
246        logging.info("Adding corrections data to atom feed")
247        self.__postEditAtomFeed(path, entryXML)
248        logging.info("Successfully added new feed entry")
Note: See TracBrowser for help on using the repository browser.