source: ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py @ 4994

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py@4994
Revision 4994, 10.1 KB checked in by cbyrom, 11 years ago (diff)

Add keyword to getAtomFeed to allow proxies to be set explicitly. Also
deal with the case when these have been switched off; feedparser
will automatically pick up the underlying os env variable settings
and these may not match the keyword settings.

Line 
1'''
2 Class supporting set up and communication with eXist DB
3 for the purposes of creating and updating atom feeds
4 
5 @author: C Byrom - Tessella 09
6'''
7import logging, httplib, urllib2, os
8import feedparser
9from ndg.common.src.models.vocabtermdata import VocabTermData as VTD
10from ndg.common.src.models.ndgObject import ndgObject
11import ndg.common.src.lib.utilities as utils
12import ndg.common.src.clients.xmldb.eXist.dbconstants as dc
13
14
15class FeedClientError(Exception):
16    """
17    Exception handling for when a problem is found with an eXist hosted atom feed
18    """
19    def __init__(self, msg):
20        logging.error(msg)
21        Exception.__init__(self, msg)
22
23class FeedClient(object):
24   
25    TOPLEVEL_ATOM_FEED_TITLE = 'Moles Atom Data'
26    TOPLEVEL_DIF_FEED_TITLE = 'DIF Data'
27    ENTITYLEVEL_ATOM_FEED_TITLE = 'Moles %s Atom Data'
28    DEPLOYMENTS_ATOM_FEED_TITLE = 'Deployments Data (activities/observation stations/data production tools)'
29    PROVIDER_SUFFIX = ' - for provider, %s'
30    PROVIDERLEVEL_ATOM_FEED_TITLE = TOPLEVEL_ATOM_FEED_TITLE + PROVIDER_SUFFIX
31    PROVIDERLEVEL_DIF_FEED_TITLE = TOPLEVEL_DIF_FEED_TITLE + PROVIDER_SUFFIX
32    CORRECTIONS_FEED_TITLE = 'Corrections data submitted by atom editor users'
33    DIF_ENTRY_TITLE = 'DIF Record'
34   
35    REST_BASE_URL = '/exist/rest'
36    FEED_ERROR_MSG = 'Failed to retrieve valid feed - error'
37    FEED_STRING = '<?xml version="1.0" ?><feed xmlns="http://www.w3.org/2005/Atom"> \
38        <title>%s</title></feed>'
39   
40    def __init__(self, auth, dbHostName = None, eXistPortNo = '8080'):
41        '''
42        Initialise a client to the eXist atom feed services
43        @param auth: Basic base64 encoded authentication details for the DB
44        @keyword dbHostName: name of eXist DB to use
45        @keyword eXistPortNo: Port number that the eXist DB is exposed by - defaults
46        to '8080' if not set
47        '''
48        logging.info("Initialising eXist feed client")
49        self.auth = auth
50        self.hostAndPort = '%s:%s' %(dbHostName, eXistPortNo)
51        logging.info("eXist feed client initialised")
52
53
54    def __postEditAtomFeed(self, collection, atomXMLString):
55        '''
56        Post an atom to the eXist atom feed 'edit' interface for the
57        specified collection
58        - if the atom is a feed, this will set up the feed, if it doesn't already
59        exist
60        - if the atom is an entry, this will add an entry to the feed for the
61        collection, if it exists
62        @param collection: eXist collection to associate the atom doc with
63        @param atomXMLString: a string containing an atom doc in XML format
64        @raise FeedClientError: if problems encountered when posting to eXist
65        '''
66        logging.debug("Setting up POST Request to eXist")
67        connection = httplib.HTTPConnection(self.hostAndPort)
68
69        # NB, authorisation is required for accessing eXist via this interface
70        headers = {
71            'Host': self.hostAndPort, 
72            'Content-Type': 'application/atom+xml',
73            'Authorization': 'Basic %s' %self.auth
74                   }
75       
76        connection.request("POST", '/exist/atom/edit' + collection, 
77                           atomXMLString, headers)
78        logging.debug("Get response...")
79        response = connection.getresponse()
80        logging.debug("Response retrieved")
81
82        if response.status == 204:
83            logging.info("No content returned => new feed set up")
84        elif response.status == 201:
85            logging.info("Successfully posted atom to eXist feed")
86        elif response.status == 401 and response.reason.find('+already+exists') > -1:
87            logging.info("Atom already exists")
88        else:
89            logging.debug("Response error:\n\r %s" %response.read())
90            raise FeedClientError("Error encountered: '%s'" %response.reason)
91
92
93    def setupBasicFeeds(self):
94        '''
95        Set up the standard feeds expected of the eXist DB
96        - nb, provider feeds are set up when creating new atoms - when
97        their providers don't already have feeds available
98        '''
99        self.createAtomFeed(dc.ATOM_COLLECTION_PATH,
100                            self.TOPLEVEL_ATOM_FEED_TITLE)
101        self.VTD = VTD()
102        self.deName = VTD.TERM_DATA[VTD.DE_TERM].title
103        for type in [dc.PUBLISHED_COLLECTION_PATH, dc.SMALL_P_PUBLISHED_COLLECTION_PATH]:
104            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DE_COLLECTION_PATH,
105                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DE_TERM].title),
106            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DEPLOYMENT_COLLECTION_PATH,
107                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DEPLOYMENT_TERM].title),
108            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DEPLOYMENTS_COLLECTION_PATH,
109                                self.DEPLOYMENTS_ATOM_FEED_TITLE),
110            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.GRANULE_COLLECTION_PATH,
111                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.GRANULE_TERM].title)
112
113        self.createAtomFeed(dc.DIF_COLLECTION_PATH,
114                            self.TOPLEVEL_DIF_FEED_TITLE)
115
116        self.createAtomFeed(dc.CORRECTIONS_COLLECTION_PATH,
117                            self.CORRECTIONS_FEED_TITLE)
118
119
120    def addAtomToFeeds(self, atom):
121        '''
122        Add an entry to the different feeds associated with the specified atom
123        @param atom: an Atom object which should be linked to as an entry in the
124        eXist feeds
125        '''
126        # add to top level feed
127        self.createAtomFeedEntry(dc.ATOM_COLLECTION_PATH, atom)
128        # add to entity level feed
129        self.createAtomFeedEntry(atom.getDefaultEntityCollectionPath(), atom)
130       
131        # add to provider level feed
132        self.createAtomFeedEntry(dc.PROVIDER_FEED_PATH + atom.ME.providerID + '/', atom)
133       
134        # add to DIF feeds - if appropriate
135        if atom.isDE():
136            self.createAtomFeedEntry(dc.DIF_COLLECTION_PATH, atom, isDIFRecord=True)
137            self.createAtomFeedEntry(dc.DIF_COLLECTION_PATH + atom.ME.providerID + '/', 
138                                     atom, isDIFRecord=True)
139       
140
141    def createAtomFeed(self, collection, title):
142        '''
143        Set the specified collection up as an atom feed
144        @param collection: eXist collection to set up as an atom feed
145        @param title: title to give the feed
146        '''
147        feedXML = self.FEED_STRING %(title)
148        logging.info("Setting up %s as an atom feed" %collection)
149        self.__postEditAtomFeed(collection, feedXML)
150        logging.info("%s successfully set up as an atom feed" %collection)
151
152
153    def getAtomFeed(self, collection, proxyServer = None):
154        '''
155        Retrieve the contents of the feed for the specified eXist collection
156        - in a feed parser object
157        @param collection: eXist collection to retrieve the feed from
158        @return feed: feed in a feedparser object
159        @keyword proxyServer: proxy server to use, if required
160        '''
161        logging.info("Retrieving content of atom feed for collection, '%s'" %collection)
162       
163        proxy = {}
164        # NB, there is slightly unhelpful default behaviour here: if there is a
165        # proxy specified in the environment variables, this will be used regardless
166        # of whether it is specified here - to temporarily remove this if no proxy
167        # is specified
168        oldProxy = None
169        if proxyServer:
170            proxy = urllib2.ProxyHandler( {"http": proxyServer} )
171        elif os.environ.has_key(utils.PROXY_KEY):
172            oldProxy = os.environ[utils.PROXY_KEY]
173            del os.environ[utils.PROXY_KEY]
174           
175        feed = feedparser.parse('http://%s/exist/atom/content/%s' 
176                                %(self.hostAndPort, collection), 
177                                handlers = [proxy])
178
179        if oldProxy:
180            os.environ[utils.PROXY_KEY] = oldProxy
181           
182        # check we've got something valid back
183        if feed.bozo:
184            errorMessage = "%s: '%s'" %(self.FEED_ERROR_MSG, feed.bozo_exception)
185            raise FeedClientError(errorMessage)
186       
187        return feed
188       
189
190    def createAtomFeedEntry(self, collection, atom, isDIFRecord=False):
191        '''
192        Add an entry to the specified collection (which should already have a
193        feed set up on it) - pointing to the specified atom link
194        @param collection: eXist collection with the atom feed to add the entry to
195        @param atom: Atom to be referenced in the new entry
196        @keyword isDIFRecord: if True, the entry contents are adjusted to
197        point to the DIF doc rather than the atom doc.  Default False.
198        '''
199        titleString = '%s Atom' %atom.atomTypeName
200        browseURL = atom.atomBrowseURL
201        if isDIFRecord:
202            titleString = self.DIF_ENTRY_TITLE
203            browseURL = browseURL.replace('__%s__' %ndgObject.ATOM_DOC_TYPE, 
204                                          '__%s__' %ndgObject.BROWSE_DIF_DOC_TYPE)
205           
206        entryXML = '<?xml version="1.0" ?>\
207<entry xmlns="http://www.w3.org/2005/Atom">\
208<title>%s [%s]</title>\
209<summary>%s</summary>\
210<content src="%s" type="application/atom+xml"/>\
211</entry>' %(titleString, atom.title, atom.Summary, browseURL)
212        logging.info("Adding link to atom (%s) as entry in atom feed, %s" \
213                     %(browseURL, collection))
214        self.__postEditAtomFeed(collection, entryXML)
215        logging.info("Successfully added new feed entry")
216       
217
218    def createCorrectionsFeedEntry(self, title, summary):
219        '''
220        Add an entry to the corrections collection (which should already have a
221        feed set up on it) - with details of the corrections data submitted by
222        users of the atom editor
223        @param title: title to give the feed entry
224        @param body: content to add to the summary element
225        '''
226        entryXML = '<?xml version="1.0" ?>\
227<entry xmlns="http://www.w3.org/2005/Atom">\
228<title>%s</title>\
229<content>%s</content>\
230</entry>' %(title, summary)
231        logging.info("Adding corrections data to atom feed")
232        self.__postEditAtomFeed(dc.CORRECTIONS_COLLECTION_PATH, entryXML)
233        logging.info("Successfully added new feed entry")
Note: See TracBrowser for help on using the repository browser.