source: ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py @ 4976

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/feedclient.py@4976
Revision 4976, 9.4 KB checked in by cbyrom, 11 years ago (diff)

Implement an eXist client based on the RESTful DB interface and implementing the interfacexmldbclient interface. Also add some small
code tidy ups and fix a couple of bugs.

Line 
1'''
2 Class supporting set up and communication with eXist DB
3 for the purposes of creating and updating atom feeds
4 
5 @author: C Byrom - Tessella 09
6'''
7import logging, httplib
8import feedparser
9from ndg.common.src.models.vocabtermdata import VocabTermData as VTD
10from ndg.common.src.models.ndgObject import ndgObject
11import ndg.common.src.clients.xmldb.eXist.dbconstants as dc
12
13
14class FeedClientError(Exception):
15    """
16    Exception handling for when a problem is found with an eXist hosted atom feed
17    """
18    def __init__(self, msg):
19        logging.error(msg)
20        Exception.__init__(self, msg)
21
22class FeedClient:
23   
24    TOPLEVEL_ATOM_FEED_TITLE = 'Moles Atom Data'
25    TOPLEVEL_DIF_FEED_TITLE = 'DIF Data'
26    ENTITYLEVEL_ATOM_FEED_TITLE = 'Moles %s Atom Data'
27    DEPLOYMENTS_ATOM_FEED_TITLE = 'Deployments Data (activities/observation stations/data production tools)'
28    PROVIDER_SUFFIX = ' - for provider, %s'
29    PROVIDERLEVEL_ATOM_FEED_TITLE = TOPLEVEL_ATOM_FEED_TITLE + PROVIDER_SUFFIX
30    PROVIDERLEVEL_DIF_FEED_TITLE = TOPLEVEL_DIF_FEED_TITLE + PROVIDER_SUFFIX
31    DIF_ENTRY_TITLE = 'DIF Record'
32   
33    REST_BASE_URL = '/exist/rest'
34    FEED_ERROR_MSG = 'Failed to retrieve valid feed - error'
35    FEED_STRING = '<?xml version="1.0" ?><feed xmlns="http://www.w3.org/2005/Atom"> \
36        <title>%s</title></feed>'
37   
38    def __init__(self, auth, dbHostName = None, eXistPortNo = '8080'):
39        '''
40        Initialise a client to the eXist atom feed services
41        @param auth: Basic base64 encoded authentication details for the DB
42        @keyword dbHostName: name of eXist DB to use
43        @keyword eXistPortNo: Port number that the eXist DB is exposed by - defaults
44        to '8080' if not set
45        '''
46        logging.info("Initialising eXist feed client")
47        self.auth = auth
48        self.hostAndPort = '%s:%s' %(dbHostName, eXistPortNo)
49        logging.info("eXist feed client initialised")
50
51
52    def __postEditAtomFeed(self, collection, atomXMLString):
53        '''
54        Post an atom to the eXist atom feed 'edit' interface for the
55        specified collection
56        - if the atom is a feed, this will set up the feed, if it doesn't already
57        exist
58        - if the atom is an entry, this will add an entry to the feed for the
59        collection, if it exists
60        @param collection: eXist collection to associate the atom doc with
61        @param atomXMLString: a string containing an atom doc in XML format
62        @raise FeedClientError: if problems encountered when posting to eXist
63        '''
64        logging.debug("Setting up POST Request to eXist")
65        connection = httplib.HTTPConnection(self.hostAndPort)
66
67        # NB, authorisation is required for accessing eXist via this interface
68        headers = {
69            'Host': self.hostAndPort, 
70            'Content-Type': 'application/atom+xml',
71            'Authorization': 'Basic %s' %self.auth
72                   }
73       
74        connection.request("POST", '/exist/atom/edit' + collection, 
75                           atomXMLString, headers)
76        logging.debug("Get response...")
77        response = connection.getresponse()
78        logging.debug("Response retrieved")
79
80        if response.status == 204:
81            logging.info("No content returned => new feed set up")
82        elif response.status == 201:
83            logging.info("Successfully posted atom to eXist feed")
84        elif response.status == 401 and response.reason.find('+already+exists') > -1:
85            logging.info("Atom already exists")
86        else:
87            logging.debug("Response error:\n\r %s" %response.read())
88            raise FeedClientError("Error encountered: '%s'" %response.reason)
89
90
91    def setupBasicFeeds(self):
92        '''
93        Set up the standard feeds expected of the eXist DB
94        - nb, provider feeds are set up when creating new atoms - when
95        their providers don't already have feeds available
96        '''
97        self.createAtomFeed(dc.ATOM_COLLECTION_PATH,
98                            self.TOPLEVEL_ATOM_FEED_TITLE)
99        self.VTD = VTD()
100        self.deName = VTD.TERM_DATA[VTD.DE_TERM].title
101        for type in [dc.PUBLISHED_COLLECTION_PATH, dc.SMALL_P_PUBLISHED_COLLECTION_PATH]:
102            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DE_COLLECTION_PATH,
103                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DE_TERM].title),
104            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DEPLOYMENT_COLLECTION_PATH,
105                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DEPLOYMENT_TERM].title),
106            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.DEPLOYMENTS_COLLECTION_PATH,
107                                self.DEPLOYMENTS_ATOM_FEED_TITLE),
108            self.createAtomFeed(dc.ATOM_COLLECTION_PATH + type + dc.GRANULE_COLLECTION_PATH,
109                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.GRANULE_TERM].title)
110
111        self.createAtomFeed(dc.DIF_COLLECTION_PATH,
112                            self.TOPLEVEL_DIF_FEED_TITLE)
113
114
115    def addAtomToFeeds(self, atom):
116        '''
117        Add an entry to the different feeds associated with the specified atom
118        @param atom: an Atom object which should be linked to as an entry in the
119        eXist feeds
120        '''
121        # add to top level feed
122        self.createAtomFeedEntry(dc.ATOM_COLLECTION_PATH, atom)
123        # add to entity level feed
124        self.createAtomFeedEntry(atom.getDefaultEntityCollectionPath(), atom)
125       
126        # add to provider level feed
127        self.createAtomFeedEntry(dc.PROVIDER_FEED_PATH + atom.ME.providerID + '/', atom)
128       
129        # add to DIF feeds - if appropriate
130        if atom.isDE():
131            self.createAtomFeedEntry(dc.DIF_COLLECTION_PATH, atom, isDIFRecord=True)
132            self.createAtomFeedEntry(dc.DIF_COLLECTION_PATH + atom.ME.providerID + '/', 
133                                     atom, isDIFRecord=True)
134       
135
136    def createAtomFeed(self, collection, title):
137        '''
138        Set the specified collection up as an atom feed
139        @param collection: eXist collection to set up as an atom feed
140        @param title: title to give the feed
141        '''
142        feedXML = self.FEED_STRING %(title)
143        logging.info("Setting up %s as an atom feed" %collection)
144        self.__postEditAtomFeed(collection, feedXML)
145        logging.info("%s successfully set up as an atom feed" %collection)
146
147
148    def getAtomFeed(self, collection):
149        '''
150        Retrieve the contents of the feed for the specified eXist collection
151        - in a feed parser object
152        @param collection: eXist collection to retrieve the feed from
153        @return feed: feed in a feedparser object
154        '''
155        logging.info("Retrieving content of atom feed for collection, '%s'" %collection)
156        feed = feedparser.parse('http://%s/exist/atom/content/%s' \
157                                %(self.hostAndPort, collection))
158       
159        # check we've got something valid back
160        if feed.bozo:
161            errorMessage = "%s: '%s'" %(self.FEED_ERROR_MSG, feed.bozo_exception)
162            raise FeedClientError(errorMessage)
163       
164        return feed
165       
166
167    def createAtomFeedEntry(self, collection, atom, isDIFRecord=False):
168        '''
169        Add an entry to the specified collection (which should already have a
170        feed set up on it) - pointing to the specified atom link
171        @param collection: eXist collection with the atom feed to add the entry to
172        @param atom: Atom to be referenced in the new entry
173        @keyword isDIFRecord: if True, the entry contents are adjusted to
174        point to the DIF doc rather than the atom doc.  Default False.
175        '''
176        titleString = '%s Atom' %atom.atomTypeName
177        browseURL = atom.atomBrowseURL
178        if isDIFRecord:
179            titleString = self.DIF_ENTRY_TITLE
180            browseURL = browseURL.replace('__%s__' %ndgObject.ATOM_DOC_TYPE, 
181                                          '__%s__' %ndgObject.BROWSE_DIF_DOC_TYPE)
182           
183        entryXML = '<?xml version="1.0" ?>\
184<entry xmlns="http://www.w3.org/2005/Atom">\
185<title>%s [%s]</title>\
186<summary>%s</summary>\
187<content src="%s" type="application/atom+xml"/>\
188</entry>' %(titleString, atom.title, atom.Summary, browseURL)
189        logging.info("Adding link to atom (%s) as entry in atom feed, %s" \
190                     %(browseURL, collection))
191        self.__postEditAtomFeed(collection, entryXML)
192        logging.info("Successfully added new feed entry")
193
194
195    def getRESTfulData(self, docPath, suppressPrefix = False):
196        '''
197        Retrieve data using the REST interface
198        @param docPath: path to the data to retrieve - NB, this should equal
199        the DB path to the document in question
200        @keyword suppressPrefix: if True, the base, REST url, is not added to
201        the specified docPath
202        @return document, if found, None, otherwise
203        '''
204        logging.info("Retrieving document at, %s, using RESTful interface" %docPath)
205        connection = httplib.HTTPConnection(self.hostAndPort)
206        url = docPath
207        if not suppressPrefix:
208            url = self.REST_BASE_URL + docPath
209
210        connection.request("GET", '%s' %(url))
211        response = connection.getresponse()
212       
213        if response.status == 200:
214            logging.info("Document successfully retrieved - returning this now")
215            return response.read()
216       
217        logging.info("- no document found at path, '%s'" %docPath)
218        return None
219       
Note: See TracBrowser for help on using the repository browser.