source: ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py @ 5192

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py@5192
Revision 5192, 10.7 KB checked in by cbyrom, 12 years ago (diff)

Escape/unescape special XML characters on export/import of data from
the Category and Link data models, to simplify processing + ensure
things are escaped when running xml validation + simplify handling
of lists/single values when setting atom attributes + fix handling
of chunked query execution + tidy up various imports and update
tests.

Line 
1'''
2 Class supporting set up and communication with eXist DB
3 for the purposes of creating and updating atom feeds
4 
5 @author: C Byrom - Tessella 09
6'''
7import logging, httplib
8import feedparser
9from ndg.common.src.models.vocabtermdata import VocabTermData as VTD
10from ndg.common.src.models.ndgObject import ndgObject
11import ndg.common.src.lib.utilities as utils
12import ndg.common.src.clients.xmldb.eXist.dbconstants as dc
13from ndg.common.src.models import AtomState
14
15
16class FeedClientError(Exception):
17    """
18    Exception handling for when a problem is found with an eXist hosted atom feed
19    """
20    def __init__(self, msg):
21        logging.error(msg)
22        Exception.__init__(self, msg)
23
24class FeedClient(object):
25   
26    TOPLEVEL_ATOM_FEED_TITLE = 'Moles Atom Data'
27    TOPLEVEL_DIF_FEED_TITLE = 'DIF Data'
28    TOPLEVEL_PUBLISHED_FEED_TITLE = 'All Published Data'
29    ENTITYLEVEL_ATOM_FEED_TITLE = 'Moles %s Atom Data'
30    DEPLOYMENTS_ATOM_FEED_TITLE = 'Deployments Data (activities/observation stations/data production tools)'
31    PROVIDER_SUFFIX = ' - for provider, %s'
32    PROVIDERLEVEL_ATOM_FEED_TITLE = TOPLEVEL_ATOM_FEED_TITLE + PROVIDER_SUFFIX
33    PROVIDERLEVEL_DIF_FEED_TITLE = TOPLEVEL_DIF_FEED_TITLE + PROVIDER_SUFFIX
34    CITATIONS_FEED_TITLE = 'Citations data received by Trackback mechanism'
35    CORRECTIONS_FEED_TITLE = 'Corrections data submitted by atom editor users'
36    DIF_ENTRY_TITLE = 'DIF Record'
37   
38    REST_BASE_URL = '/exist/rest'
39    FEED_ERROR_MSG = 'Failed to retrieve valid feed - error'
40    FEED_STRING = '<?xml version="1.0" ?><feed xmlns="http://www.w3.org/2005/Atom"> \
41        <title>%s</title></feed>'
42   
43    def __init__(self, auth, dbHostName = None, eXistPortNo = '8080'):
44        '''
45        Initialise a client to the eXist atom feed services
46        @param auth: Basic base64 encoded authentication details for the DB
47        @keyword dbHostName: name of eXist DB to use
48        @keyword eXistPortNo: Port number that the eXist DB is exposed by - defaults
49        to '8080' if not set
50        '''
51        logging.info("Initialising eXist feed client")
52        self.auth = auth
53        self.hostAndPort = '%s:%s' %(dbHostName, eXistPortNo)
54        logging.info("eXist feed client initialised")
55
56
57    def __postEditAtomFeed(self, collection, atomXMLString):
58        '''
59        Post an atom to the eXist atom feed 'edit' interface for the
60        specified collection
61        - if the atom is a feed, this will set up the feed, if it doesn't already
62        exist
63        - if the atom is an entry, this will add an entry to the feed for the
64        collection, if it exists
65        @param collection: eXist collection to associate the atom doc with
66        @param atomXMLString: a string containing an atom doc in XML format
67        @raise FeedClientError: if problems encountered when posting to eXist
68        '''
69        logging.debug("Setting up POST Request to eXist")
70        connection = httplib.HTTPConnection(self.hostAndPort)
71
72        # NB, authorisation is required for accessing eXist via this interface
73        headers = {
74            'Host': self.hostAndPort, 
75            'Content-Type': 'application/atom+xml',
76            'Authorization': 'Basic %s' %self.auth
77                   }
78       
79        connection.request("POST", '/exist/atom/edit' + collection, 
80                           atomXMLString, headers)
81        logging.debug("Get response...")
82        response = connection.getresponse()
83        logging.debug("Response retrieved")
84
85        if response.status == 204:
86            logging.info("No content returned => new feed set up")
87        elif response.status == 201:
88            logging.info("Successfully posted atom to eXist feed")
89        elif response.status == 401 and response.reason.find('+already+exists') > -1:
90            logging.info("Atom already exists")
91        else:
92            logging.debug("Response error:\n\r %s" %response.read())
93            raise FeedClientError("Error encountered: '%s'" %response.reason)
94
95
96    def setupBasicFeeds(self):
97        '''
98        Set up the standard feeds expected of the eXist DB
99        - nb, provider feeds are set up when creating new atoms - when
100        their providers don't already have feeds available
101        '''
102        self.createAtomFeed(dc.ATOM_COLLECTION_PATH,
103                            self.TOPLEVEL_ATOM_FEED_TITLE)
104        self.VTD = VTD()
105        self.deName = VTD.TERM_DATA[VTD.DE_TERM].title
106        for type in [dc.PUBLISHED_COLLECTION_PATH, dc.SMALL_P_PUBLISHED_COLLECTION_PATH]:
107            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DE_COLLECTION_PATH,
108                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DE_TERM].title),
109            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DEPLOYMENT_COLLECTION_PATH,
110                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DEPLOYMENT_TERM].title),
111            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DEPLOYMENTS_COLLECTION_PATH,
112                                self.DEPLOYMENTS_ATOM_FEED_TITLE),
113            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.GRANULE_COLLECTION_PATH,
114                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.GRANULE_TERM].title)
115
116        self.createAtomFeed(dc.ATOM_COLLECTION_PATH + dc.PUBLISHED_COLLECTION_PATH,
117                            self.TOPLEVEL_PUBLISHED_FEED_TITLE)
118
119        self.createAtomFeed(dc.DIF_COLLECTION_PATH,
120                            self.TOPLEVEL_DIF_FEED_TITLE)
121
122        self.createAtomFeed(dc.CORRECTIONS_COLLECTION_PATH,
123                            self.CORRECTIONS_FEED_TITLE)
124
125        self.createAtomFeed(dc.CITATIONS_COLLECTION_PATH,
126                            self.CITATIONS_FEED_TITLE)
127
128
129    def addAtomToFeeds(self, atom):
130        '''
131        Add an entry to the different feeds associated with the specified atom
132        @param atom: an Atom object which should be linked to as an entry in the
133        eXist feeds
134        '''
135        # add to top level feed
136        self.createAtomFeedEntry(dc.ATOM_COLLECTION_PATH, atom)
137
138        # add to entity level feed
139        self.createAtomFeedEntry(atom.getDefaultEntityCollectionPath(), atom)
140       
141        # add to Published top level feed, if apprpriate
142        if atom.state == AtomState.PUBLISHED_STATE:
143            self.createAtomFeedEntry(dc.ATOM_COLLECTION_PATH + 
144                                     dc.PUBLISHED_COLLECTION_PATH, atom)
145
146        # add to provider level feed
147        self.createAtomFeedEntry(dc.PROVIDER_FEED_PATH + atom.ME.providerID + '/', atom)
148       
149        # add to DIF feeds - if appropriate
150        if atom.isDE():
151            self.createAtomFeedEntry(dc.DIF_COLLECTION_PATH, atom, isDIFRecord=True)
152            self.createAtomFeedEntry(dc.DIF_COLLECTION_PATH + atom.ME.providerID + '/', 
153                                     atom, isDIFRecord=True)
154       
155
156    def createAtomFeed(self, collection, title):
157        '''
158        Set the specified collection up as an atom feed
159        @param collection: eXist collection to set up as an atom feed
160        @param title: title to give the feed
161        '''
162        feedXML = self.FEED_STRING %(title)
163        logging.info("Setting up %s as an atom feed" %collection)
164        self.__postEditAtomFeed(collection, feedXML)
165        logging.info("%s successfully set up as an atom feed" %collection)
166
167
168    def getAtomFeed(self, collection, proxyServer = None):
169        '''
170        Retrieve the contents of the feed for the specified eXist collection
171        - in a feed parser object
172        @param collection: eXist collection to retrieve the feed from
173        @keyword proxyServer: proxy server to use, if required
174        @return feed, xml: feed in a feedparser object and xml original data
175        '''
176        logging.info("Retrieving content of atom feed for collection, '%s'" %collection)
177       
178        uri = 'http://%s/exist/atom/content/%s' %(self.hostAndPort, collection)
179
180        proxy = {}
181        if proxyServer:
182            proxy = {'http':proxyServer}
183        xml = utils.openURLWithProxy(uri, proxy)
184       
185        # NB, feedparser is not fussy when it comes to parsing XML docs - so won't
186        # detect if there's been an error here - so check for this in the XML
187        if xml.find('HTTP ERROR') > -1:
188            errorMessage = "Error returned when retrieving feed: '%s'" %xml
189            logging.error(errorMessage)
190            raise FeedClientError(errorMessage)
191
192        feed = feedparser.parse(xml)
193
194        # check we've got something valid back
195        if feed.bozo:
196            errorMessage = "%s: '%s'" %(self.FEED_ERROR_MSG, feed.bozo_exception)
197            logging.error(errorMessage)
198            raise FeedClientError(errorMessage)
199       
200        return feed, xml
201       
202
203    def createAtomFeedEntry(self, collection, atom, isDIFRecord=False):
204        '''
205        Add an entry to the specified collection (which should already have a
206        feed set up on it) - pointing to the specified atom link
207        @param collection: eXist collection with the atom feed to add the entry to
208        @param atom: Atom to be referenced in the new entry
209        @keyword isDIFRecord: if True, the entry contents are adjusted to
210        point to the DIF doc rather than the atom doc.  Default False.
211        '''
212        titleString = '%s Atom' %atom.atomTypeName
213        browseURL = atom.atomBrowseURL
214        if isDIFRecord:
215            titleString = self.DIF_ENTRY_TITLE
216            browseURL = browseURL.replace('__%s__' %ndgObject.ATOM_DOC_TYPE, 
217                                          '__%s__' %ndgObject.BROWSE_DIF_DOC_TYPE)
218           
219        entryXML = '<?xml version="1.0" ?>\
220<entry xmlns="http://www.w3.org/2005/Atom">\
221<title>%s [%s]</title>\
222<summary>%s</summary>\
223<content src="%s" type="application/atom+xml"/>\
224</entry>' %(titleString, atom.title, atom.Summary, browseURL)
225        logging.info("Adding link to atom (%s) as entry in atom feed, %s" \
226                     %(browseURL, collection))
227        self.__postEditAtomFeed(collection, entryXML)
228        logging.info("Successfully added new feed entry")
229       
230
231    def createGenericFeedEntry(self, title, summary, path):
232        '''
233        Add an entry to the specified collection (which should already have a
234        feed set up on it) - with details of the corrections data submitted by
235        users of the atom editor
236        @param title: title to give the feed entry
237        @param summary: content to add to the summary element
238        @param path: path to collection for the feed
239        '''
240        entryXML = '<?xml version="1.0" ?>\
241<entry xmlns="http://www.w3.org/2005/Atom">\
242<title>%s</title>\
243<content>%s</content>\
244</entry>' %(title, summary)
245        logging.info("Adding corrections data to atom feed")
246        self.__postEditAtomFeed(path, entryXML)
247        logging.info("Successfully added new feed entry")
Note: See TracBrowser for help on using the repository browser.