source: ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py @ 5018

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py@5018
Revision 5018, 10.3 KB checked in by cbyrom, 13 years ago (diff)

Generecise method for adding new entries to atom feeds - to allow
re-use between new feeds for citations and comments + add new
setting up structure and contents to handle new feed to collect
trackback citation info.

Line 
1'''
2 Class supporting set up and communication with eXist DB
3 for the purposes of creating and updating atom feeds
4 
5 @author: C Byrom - Tessella 09
6'''
7import logging, httplib, urllib2, os
8import feedparser
9from ndg.common.src.models.vocabtermdata import VocabTermData as VTD
10from ndg.common.src.models.ndgObject import ndgObject
11import ndg.common.src.lib.utilities as utils
12import ndg.common.src.clients.xmldb.eXist.dbconstants as dc
13
14
15class FeedClientError(Exception):
16    """
17    Exception handling for when a problem is found with an eXist hosted atom feed
18    """
19    def __init__(self, msg):
20        logging.error(msg)
21        Exception.__init__(self, msg)
22
23class FeedClient(object):
24   
25    TOPLEVEL_ATOM_FEED_TITLE = 'Moles Atom Data'
26    TOPLEVEL_DIF_FEED_TITLE = 'DIF Data'
27    ENTITYLEVEL_ATOM_FEED_TITLE = 'Moles %s Atom Data'
28    DEPLOYMENTS_ATOM_FEED_TITLE = 'Deployments Data (activities/observation stations/data production tools)'
29    PROVIDER_SUFFIX = ' - for provider, %s'
30    PROVIDERLEVEL_ATOM_FEED_TITLE = TOPLEVEL_ATOM_FEED_TITLE + PROVIDER_SUFFIX
31    PROVIDERLEVEL_DIF_FEED_TITLE = TOPLEVEL_DIF_FEED_TITLE + PROVIDER_SUFFIX
32    CITATIONS_FEED_TITLE = 'Citations data received by Trackback mechanism'
33    CORRECTIONS_FEED_TITLE = 'Corrections data submitted by atom editor users'
34    DIF_ENTRY_TITLE = 'DIF Record'
35   
36    REST_BASE_URL = '/exist/rest'
37    FEED_ERROR_MSG = 'Failed to retrieve valid feed - error'
38    FEED_STRING = '<?xml version="1.0" ?><feed xmlns="http://www.w3.org/2005/Atom"> \
39        <title>%s</title></feed>'
40   
41    def __init__(self, auth, dbHostName = None, eXistPortNo = '8080'):
42        '''
43        Initialise a client to the eXist atom feed services
44        @param auth: Basic base64 encoded authentication details for the DB
45        @keyword dbHostName: name of eXist DB to use
46        @keyword eXistPortNo: Port number that the eXist DB is exposed by - defaults
47        to '8080' if not set
48        '''
49        logging.info("Initialising eXist feed client")
50        self.auth = auth
51        self.hostAndPort = '%s:%s' %(dbHostName, eXistPortNo)
52        logging.info("eXist feed client initialised")
53
54
55    def __postEditAtomFeed(self, collection, atomXMLString):
56        '''
57        Post an atom to the eXist atom feed 'edit' interface for the
58        specified collection
59        - if the atom is a feed, this will set up the feed, if it doesn't already
60        exist
61        - if the atom is an entry, this will add an entry to the feed for the
62        collection, if it exists
63        @param collection: eXist collection to associate the atom doc with
64        @param atomXMLString: a string containing an atom doc in XML format
65        @raise FeedClientError: if problems encountered when posting to eXist
66        '''
67        logging.debug("Setting up POST Request to eXist")
68        connection = httplib.HTTPConnection(self.hostAndPort)
69
70        # NB, authorisation is required for accessing eXist via this interface
71        headers = {
72            'Host': self.hostAndPort, 
73            'Content-Type': 'application/atom+xml',
74            'Authorization': 'Basic %s' %self.auth
75                   }
76       
77        connection.request("POST", '/exist/atom/edit' + collection, 
78                           atomXMLString, headers)
79        logging.debug("Get response...")
80        response = connection.getresponse()
81        logging.debug("Response retrieved")
82
83        if response.status == 204:
84            logging.info("No content returned => new feed set up")
85        elif response.status == 201:
86            logging.info("Successfully posted atom to eXist feed")
87        elif response.status == 401 and response.reason.find('+already+exists') > -1:
88            logging.info("Atom already exists")
89        else:
90            logging.debug("Response error:\n\r %s" %response.read())
91            raise FeedClientError("Error encountered: '%s'" %response.reason)
92
93
94    def setupBasicFeeds(self):
95        '''
96        Set up the standard feeds expected of the eXist DB
97        - nb, provider feeds are set up when creating new atoms - when
98        their providers don't already have feeds available
99        '''
100        self.createAtomFeed(dc.ATOM_COLLECTION_PATH,
101                            self.TOPLEVEL_ATOM_FEED_TITLE)
102        self.VTD = VTD()
103        self.deName = VTD.TERM_DATA[VTD.DE_TERM].title
104        for type in [dc.PUBLISHED_COLLECTION_PATH, dc.SMALL_P_PUBLISHED_COLLECTION_PATH]:
105            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DE_COLLECTION_PATH,
106                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DE_TERM].title),
107            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DEPLOYMENT_COLLECTION_PATH,
108                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DEPLOYMENT_TERM].title),
109            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DEPLOYMENTS_COLLECTION_PATH,
110                                self.DEPLOYMENTS_ATOM_FEED_TITLE),
111            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.GRANULE_COLLECTION_PATH,
112                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.GRANULE_TERM].title)
113
114        self.createAtomFeed(dc.DIF_COLLECTION_PATH,
115                            self.TOPLEVEL_DIF_FEED_TITLE)
116
117        self.createAtomFeed(dc.CORRECTIONS_COLLECTION_PATH,
118                            self.CORRECTIONS_FEED_TITLE)
119
120        self.createAtomFeed(dc.CITATIONS_COLLECTION_PATH,
121                            self.CITATIONS_FEED_TITLE)
122
123
124    def addAtomToFeeds(self, atom):
125        '''
126        Add an entry to the different feeds associated with the specified atom
127        @param atom: an Atom object which should be linked to as an entry in the
128        eXist feeds
129        '''
130        # add to top level feed
131        self.createAtomFeedEntry(dc.ATOM_COLLECTION_PATH, atom)
132        # add to entity level feed
133        self.createAtomFeedEntry(atom.getDefaultEntityCollectionPath(), atom)
134       
135        # add to provider level feed
136        self.createAtomFeedEntry(dc.PROVIDER_FEED_PATH + atom.ME.providerID + '/', atom)
137       
138        # add to DIF feeds - if appropriate
139        if atom.isDE():
140            self.createAtomFeedEntry(dc.DIF_COLLECTION_PATH, atom, isDIFRecord=True)
141            self.createAtomFeedEntry(dc.DIF_COLLECTION_PATH + atom.ME.providerID + '/', 
142                                     atom, isDIFRecord=True)
143       
144
145    def createAtomFeed(self, collection, title):
146        '''
147        Set the specified collection up as an atom feed
148        @param collection: eXist collection to set up as an atom feed
149        @param title: title to give the feed
150        '''
151        feedXML = self.FEED_STRING %(title)
152        logging.info("Setting up %s as an atom feed" %collection)
153        self.__postEditAtomFeed(collection, feedXML)
154        logging.info("%s successfully set up as an atom feed" %collection)
155
156
157    def getAtomFeed(self, collection, proxyServer = None):
158        '''
159        Retrieve the contents of the feed for the specified eXist collection
160        - in a feed parser object
161        @param collection: eXist collection to retrieve the feed from
162        @return feed: feed in a feedparser object
163        @keyword proxyServer: proxy server to use, if required
164        '''
165        logging.info("Retrieving content of atom feed for collection, '%s'" %collection)
166       
167        proxy = {}
168        # NB, there is slightly unhelpful default behaviour here: if there is a
169        # proxy specified in the environment variables, this will be used regardless
170        # of whether it is specified here - to temporarily remove this if no proxy
171        # is specified
172        oldProxy = None
173        if proxyServer:
174            proxy = urllib2.ProxyHandler( {"http": proxyServer} )
175        elif os.environ.has_key(utils.PROXY_KEY):
176            oldProxy = os.environ[utils.PROXY_KEY]
177            del os.environ[utils.PROXY_KEY]
178           
179        feed = feedparser.parse('http://%s/exist/atom/content/%s' 
180                                %(self.hostAndPort, collection), 
181                                handlers = [proxy])
182
183        if oldProxy:
184            os.environ[utils.PROXY_KEY] = oldProxy
185           
186        # check we've got something valid back
187        if feed.bozo:
188            errorMessage = "%s: '%s'" %(self.FEED_ERROR_MSG, feed.bozo_exception)
189            raise FeedClientError(errorMessage)
190       
191        return feed
192       
193
194    def createAtomFeedEntry(self, collection, atom, isDIFRecord=False):
195        '''
196        Add an entry to the specified collection (which should already have a
197        feed set up on it) - pointing to the specified atom link
198        @param collection: eXist collection with the atom feed to add the entry to
199        @param atom: Atom to be referenced in the new entry
200        @keyword isDIFRecord: if True, the entry contents are adjusted to
201        point to the DIF doc rather than the atom doc.  Default False.
202        '''
203        titleString = '%s Atom' %atom.atomTypeName
204        browseURL = atom.atomBrowseURL
205        if isDIFRecord:
206            titleString = self.DIF_ENTRY_TITLE
207            browseURL = browseURL.replace('__%s__' %ndgObject.ATOM_DOC_TYPE, 
208                                          '__%s__' %ndgObject.BROWSE_DIF_DOC_TYPE)
209           
210        entryXML = '<?xml version="1.0" ?>\
211<entry xmlns="http://www.w3.org/2005/Atom">\
212<title>%s [%s]</title>\
213<summary>%s</summary>\
214<content src="%s" type="application/atom+xml"/>\
215</entry>' %(titleString, atom.title, atom.Summary, browseURL)
216        logging.info("Adding link to atom (%s) as entry in atom feed, %s" \
217                     %(browseURL, collection))
218        self.__postEditAtomFeed(collection, entryXML)
219        logging.info("Successfully added new feed entry")
220       
221
222    def createGenericFeedEntry(self, title, summary, path):
223        '''
224        Add an entry to the corrections collection (which should already have a
225        feed set up on it) - with details of the corrections data submitted by
226        users of the atom editor
227        @param title: title to give the feed entry
228        @param body: content to add to the summary element
229        '''
230        entryXML = '<?xml version="1.0" ?>\
231<entry xmlns="http://www.w3.org/2005/Atom">\
232<title>%s</title>\
233<content>%s</content>\
234</entry>' %(title, summary)
235        logging.info("Adding corrections data to atom feed")
236        self.__postEditAtomFeed(path, entryXML)
237        logging.info("Successfully added new feed entry")
Note: See TracBrowser for help on using the repository browser.