source: ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py @ 7736

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py
Revision 7736, 11.1 KB checked in by sdonegan, 10 years ago (diff)

Update to allow feedclient to work with iso recs

Line 
1'''
2 Class supporting set up and communication with eXist DB
3 for the purposes of creating and updating atom feeds
4 
5 @author: C Byrom - Tessella 09
6'''
7import logging, httplib
8import feedparser
9from xml.sax.saxutils import escape
10from ndg.common.src.models.vocabtermdata import VocabTermData as VTD
11from ndg.common.src.models.ndgObject import ndgObject
12import ndg.common.src.lib.utilities as utils
13import ndg.common.src.clients.xmldb.eXist.dbconstants as dc
14from ndg.common.src.models import AtomState
15
16
17class FeedClientError(Exception):
18    """
19    Exception handling for when a problem is found with an eXist hosted atom feed
20    """
21    def __init__(self, msg):
22        logging.error(msg)
23        Exception.__init__(self, msg)
24
25class FeedClient(object):
26   
27    TOPLEVEL_ATOM_FEED_TITLE = 'Moles Atom Data'
28    TOPLEVEL_DIF_FEED_TITLE = 'DIF Data'
29    TOPLEVEL_PUBLISHED_FEED_TITLE = 'All Published Data'
30    ENTITYLEVEL_ATOM_FEED_TITLE = 'Moles %s Atom Data'
31    DEPLOYMENTS_ATOM_FEED_TITLE = 'Deployments Data (activities/observation stations/data production tools)'
32    PROVIDER_SUFFIX = ' - for provider, %s'
33    PROVIDERLEVEL_ATOM_FEED_TITLE = TOPLEVEL_ATOM_FEED_TITLE + PROVIDER_SUFFIX
34    PROVIDERLEVEL_DIF_FEED_TITLE = TOPLEVEL_DIF_FEED_TITLE + PROVIDER_SUFFIX
35    CITATIONS_FEED_TITLE = 'Citations data received by Trackback mechanism'
36    CORRECTIONS_FEED_TITLE = 'Corrections data submitted by atom editor users'
37    DISCOVERY_FEED_TITLE = 'Discovery service information'
38    DIF_ENTRY_TITLE = 'DIF Record'
39    DMS_ENTRY_TITLE = 'NERC ISO DMS record'
40   
41    REST_BASE_URL = '/exist/rest'
42    FEED_ERROR_MSG = 'Failed to retrieve valid feed - error'
43    FEED_STRING = '<?xml version="1.0" ?><feed xmlns="http://www.w3.org/2005/Atom"> \
44        <title>%s</title></feed>'
45   
46    def __init__(self, auth, dbHostName = None, eXistPortNo = '8080'):
47        '''
48        Initialise a client to the eXist atom feed services
49        @param auth: Basic base64 encoded authentication details for the DB
50        @keyword dbHostName: name of eXist DB to use
51        @keyword eXistPortNo: Port number that the eXist DB is exposed by - defaults
52        to '8080' if not set
53        '''
54        logging.info("Initialising eXist feed client")
55        self.auth = auth
56        self.hostAndPort = '%s:%s' %(dbHostName, eXistPortNo)
57        logging.info("eXist feed client initialised")
58
59
60    def __postEditAtomFeed(self, collection, atomXMLString):
61        '''
62        Post an atom to the eXist atom feed 'edit' interface for the
63        specified collection
64        - if the atom is a feed, this will set up the feed, if it doesn't already
65        exist
66        - if the atom is an entry, this will add an entry to the feed for the
67        collection, if it exists
68        @param collection: eXist collection to associate the atom doc with
69        @param atomXMLString: a string containing an atom doc in XML format
70        @raise FeedClientError: if problems encountered when posting to eXist
71        '''
72        logging.debug("Setting up POST Request to eXist")
73        connection = httplib.HTTPConnection(self.hostAndPort)
74
75        # NB, authorisation is required for accessing eXist via this interface
76        headers = {
77            'Host': self.hostAndPort, 
78            'Content-Type': 'application/atom+xml',
79            'Authorization': 'Basic %s' %self.auth
80                   }
81       
82        connection.request("POST", '/exist/atom/edit' + collection, 
83                           atomXMLString, headers)
84        logging.debug("Get response...")
85        response = connection.getresponse()
86        logging.debug("Response retrieved")
87
88        if response.status == 204:
89            logging.info("No content returned => new feed set up")
90        elif response.status == 201:
91            logging.info("Successfully posted atom to eXist feed")
92        elif response.status == 401 and response.reason.find('+already+exists') > -1:
93            logging.info("Atom already exists")
94        else:
95            logging.debug("Response error:\n\r %s" %response.read())
96            raise FeedClientError("Error encountered: '%s'" %response.reason)
97
98
99    def setupBasicFeeds(self):
100        '''
101        Set up the standard feeds expected of the eXist DB
102        - nb, provider feeds are set up when creating new atoms - when
103        their providers don't already have feeds available
104        '''
105        self.createAtomFeed(dc.ATOM_COLLECTION_PATH,
106                            self.TOPLEVEL_ATOM_FEED_TITLE)
107        self.VTD = VTD()
108        self.deName = VTD.TERM_DATA[VTD.DE_TERM].title
109        for type in [dc.PUBLISHED_COLLECTION_PATH, dc.SMALL_P_PUBLISHED_COLLECTION_PATH]:
110            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DE_COLLECTION_PATH,
111                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DE_TERM].title),
112            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DEPLOYMENT_COLLECTION_PATH,
113                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DEPLOYMENT_TERM].title),
114            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DEPLOYMENTS_COLLECTION_PATH,
115                                self.DEPLOYMENTS_ATOM_FEED_TITLE),
116            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.GRANULE_COLLECTION_PATH,
117                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.GRANULE_TERM].title)
118
119        self.createAtomFeed(dc.ATOM_COLLECTION_PATH + dc.PUBLISHED_COLLECTION_PATH,
120                            self.TOPLEVEL_PUBLISHED_FEED_TITLE)
121
122        self.createAtomFeed(dc.DIF_COLLECTION_PATH,
123                            self.TOPLEVEL_DIF_FEED_TITLE)
124
125        self.createAtomFeed(dc.CORRECTIONS_COLLECTION_PATH,
126                            self.CORRECTIONS_FEED_TITLE)
127
128        self.createAtomFeed(dc.CITATIONS_COLLECTION_PATH,
129                            self.CITATIONS_FEED_TITLE)
130       
131        self.createAtomFeed(dc.DISCOVERY_FEED_PATH,
132                            self.DISCOVERY_FEED_TITLE)
133       
134
135
136    def addAtomToFeeds(self, atom):
137        '''
138        Add an entry to the different feeds associated with the specified atom
139        @param atom: an Atom object which should be linked to as an entry in the
140        eXist feeds
141        '''
142        # add to top level feed
143        self.createAtomFeedEntry(dc.ATOM_COLLECTION_PATH, atom)
144
145        # add to entity level feed
146        self.createAtomFeedEntry(atom.getDefaultEntityCollectionPath(), atom)
147       
148        # add to Published top level feed, if apprpriate
149        if atom.state == AtomState.PUBLISHED_STATE:
150            self.createAtomFeedEntry(dc.ATOM_COLLECTION_PATH + 
151                                     dc.PUBLISHED_COLLECTION_PATH, atom)
152
153        # add to provider level feed
154        self.createAtomFeedEntry(dc.PROVIDER_FEED_PATH + atom.ME.providerID + '/', atom)
155       
156        # add to DIF feeds - if appropriate
157        if atom.isDE():
158            self.createAtomFeedEntry(dc.DIF_COLLECTION_PATH, atom, isDIFRecord=True)
159            self.createAtomFeedEntry(dc.DIF_COLLECTION_PATH + atom.ME.providerID + '/', 
160                                     atom, isDIFRecord=True)
161       
162
163    def createAtomFeed(self, collection, title):
164        '''
165        Set the specified collection up as an atom feed
166        @param collection: eXist collection to set up as an atom feed
167        @param title: title to give the feed
168        '''
169        feedXML = self.FEED_STRING %(title)
170        logging.info("Setting up %s as an atom feed" %collection)
171        self.__postEditAtomFeed(collection, feedXML)
172        logging.info("%s successfully set up as an atom feed" %collection)
173
174
175    def getAtomFeed(self, collection, proxyServer = None):
176        '''
177        Retrieve the contents of the feed for the specified eXist collection
178        - in a feed parser object
179        @param collection: eXist collection to retrieve the feed from
180        @keyword proxyServer: proxy server to use, if required
181        @return feed, xml: feed in a feedparser object and xml original data
182        '''
183        logging.info("Retrieving content of atom feed for collection, '%s'" %collection)
184       
185        uri = 'http://%s/exist/atom/content/%s' %(self.hostAndPort, collection)
186
187        proxy = {}
188        if proxyServer:
189            proxy = {'http':proxyServer}
190        xml = utils.openURLWithProxy(uri, proxy)
191       
192        # NB, feedparser is not fussy when it comes to parsing XML docs - so won't
193        # detect if there's been an error here - so check for this in the XML
194        if xml.find('HTTP ERROR') > -1:
195            errorMessage = "Error returned when retrieving feed: '%s'" %xml
196            logging.error(errorMessage)
197            raise FeedClientError(errorMessage)
198
199        feed = feedparser.parse(xml)
200
201        # check we've got something valid back
202        if feed.bozo:
203            errorMessage = "%s: '%s'" %(self.FEED_ERROR_MSG, feed.bozo_exception)
204            logging.error(errorMessage)
205            raise FeedClientError(errorMessage)
206       
207        return feed, xml
208       
209
210    def createAtomFeedEntry(self, collection, atom, isDIFRecord=False):
211        '''
212        Add an entry to the specified collection (which should already have a
213        feed set up on it) - pointing to the specified atom link
214        @param collection: eXist collection with the atom feed to add the entry to
215        @param atom: Atom to be referenced in the new entry
216        @keyword isDIFRecord: if True, the entry contents are adjusted to
217        point to the DIF doc rather than the atom doc.  Default False.
218        '''
219        titleString = '%s Atom' %atom.atomTypeName
220        browseURL = atom.atomBrowseURL
221        if isDIFRecord:
222            titleString = self.DIF_ENTRY_TITLE
223            browseURL = browseURL.replace('__%s__' %ndgObject.ATOM_DOC_TYPE, 
224                                          '__%s__' %ndgObject.BROWSE_DIF_DOC_TYPE)
225           
226        entryXML = '<?xml version="1.0" ?>\
227<entry xmlns="http://www.w3.org/2005/Atom">\
228<title>%s [%s]</title>\
229<summary>%s</summary>\
230<content src="%s" type="application/atom+xml"/>\
231</entry>' %(titleString, escape(atom.title), escape(atom.summary), browseURL)
232        logging.info("Adding link to atom (%s) as entry in atom feed, %s" \
233                     %(browseURL, collection))
234        self.__postEditAtomFeed(collection, entryXML)
235        logging.info("Successfully added new feed entry")
236       
237
238    def createGenericFeedEntry(self, authorName, title, summary, path):
239        '''
240        Add an entry to the specified collection (which should already have a
241        feed set up on it) - with details of the corrections data submitted by
242        users of the atom editor
243        @param authorName: Name of author of the feed entry
244        @param title: title to give the feed entry
245        @param summary: content to add to the summary element
246        @param path: path to collection for the feed
247        '''
248        entryXML = '<?xml version="1.0" ?>\
249<entry xmlns="http://www.w3.org/2005/Atom">\
250<author><name>%s</name></author>\
251<title>%s</title>\
252<content>%s</content>\
253</entry>' %(escape(authorName), escape(title), escape(summary))
254        logging.info("Adding corrections data to atom feed")
255        self.__postEditAtomFeed(path, entryXML)
256        logging.info("Successfully added new feed entry")
Note: See TracBrowser for help on using the repository browser.