source: ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/existdbfeedclient.py @ 4828

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/clients/xmldb/eXist/existdbfeedclient.py@4828
Revision 4828, 9.4 KB checked in by cbyrom, 11 years ago (diff)

Improve exist client checking of results + extend logging + add support
for producing DIF feeds + improve error handling.

Line 
1'''
2 Class supporting set up and communication with eXist DB
3 for the purposes of creating and updating atom feeds
4 
5 @author: C Byrom - Tessella 09
6'''
7import logging, httplib
8import feedparser
9from ndg.common.src.models.vocabtermdata import VocabTermData as VTD
10from ndg.common.src.models.ndgObject import ndgObject
11from ndg.common.src.clients.xmldb.eXist.eXistConnector import eXistConnector as ec
12
13
14class FeedClientError(Exception):
15    """
16    Exception handling for when a problem is found with an eXist hosted atom feed
17    """
18    def __init__(self, msg):
19        logging.error(msg)
20        Exception.__init__(self, msg)
21
22class eXistDBFeedClient:
23   
24    TOPLEVEL_ATOM_FEED_TITLE = 'Moles Atom Data'
25    TOPLEVEL_DIF_FEED_TITLE = 'DIF Data'
26    ENTITYLEVEL_ATOM_FEED_TITLE = 'Moles %s Atom Data'
27    DEPLOYMENTS_ATOM_FEED_TITLE = 'Deployments Data (activities/observation stations/data production tools)'
28    PROVIDERLEVEL_ATOM_FEED_TITLE = 'Moles Atom Data - for provider, %s'
29    PROVIDERLEVEL_DIF_FEED_TITLE = 'Moles Atom Data - for provider, %s'
30    DIF_ENTRY_TITLE = 'DIF Record'
31   
32    REST_BASE_URL = '/exist/rest'
33    FEED_ERROR_MSG = 'Failed to retrieve valid feed - error'
34    FEED_STRING = '<?xml version="1.0" ?><feed xmlns="http://www.w3.org/2005/Atom"> \
35        <title>%s</title></feed>'
36   
37    def __init__(self, auth, eXistDBHostname = None, eXistPortNo = '8080'):
38        '''
39        Initialise a client to the eXist atom feed services
40        @param auth: Basic base64 encoded authentication details for the DB
41        @keyword eXistDBHostname: name of eXist DB to use
42        @keyword eXistPortNo: Port number that the eXist DB is exposed by - defaults
43        to '8080' if not set
44        '''
45        logging.info("Initialising eXist feed client")
46        self.auth = auth
47        self.hostAndPort = '%s:%s' %(eXistDBHostname, eXistPortNo)
48        logging.info("eXist feed client initialised")
49
50
51    def __postEditAtomFeed(self, collection, atomXMLString):
52        '''
53        Post an atom to the eXist atom feed 'edit' interface for the
54        specified collection
55        - if the atom is a feed, this will set up the feed, if it doesn't already
56        exist
57        - if the atom is an entry, this will add an entry to the feed for the
58        collection, if it exists
59        @param collection: eXist collection to associate the atom doc with
60        @param atomXMLString: a string containing an atom doc in XML format
61        @raise FeedClientError: if problems encountered when posting to eXist
62        '''
63        logging.debug("Setting up POST Request to eXist")
64        connection = httplib.HTTPConnection(self.hostAndPort)
65
66        # NB, authorisation is required for accessing eXist via this interface
67        headers = {
68            'Host': self.hostAndPort, 
69            'Content-Type': 'application/atom+xml',
70            'Authorization': 'Basic %s' %self.auth
71                   }
72       
73        connection.request("POST", '/exist/atom/edit' + collection, 
74                           atomXMLString, headers)
75        logging.debug("Get response...")
76        response = connection.getresponse()
77        logging.debug("Response retrieved")
78
79        if response.status == 204:
80            logging.info("No content returned => new feed set up")
81        elif response.status == 201:
82            logging.info("Successfully posted atom to eXist feed")
83        elif response.status == 401 and response.reason.find('+already+exists') > -1:
84            logging.info("Atom already exists")
85        else:
86            logging.debug("Response error:\n\r %s" %response.read())
87            raise FeedClientError("Error encountered: '%s'" %response.reason)
88
89
90    def setupBasicFeeds(self):
91        '''
92        Set up the standard feeds expected of the eXist DB
93        - nb, provider feeds are set up when creating new atoms - when
94        their providers don't already have feeds available
95        '''
96        self.createAtomFeed(ec.BASE_COLLECTION_PATH,
97                            self.TOPLEVEL_ATOM_FEED_TITLE)
98        self.VTD = VTD()
99        self.deName = VTD.TERM_DATA[VTD.DE_TERM].title
100        for type in [ec.PUBLISHED_COLLECTION_PATH, ec.SMALL_P_PUBLISHED_COLLECTION_PATH]:
101            self.createAtomFeed(ec.BASE_COLLECTION_PATH + type + ec.DE_COLLECTION_PATH,
102                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DE_TERM].title),
103            self.createAtomFeed(ec.BASE_COLLECTION_PATH + type + ec.DEPLOYMENT_COLLECTION_PATH,
104                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.DEPLOYMENT_TERM].title),
105            self.createAtomFeed(ec.BASE_COLLECTION_PATH + type + ec.DEPLOYMENTS_COLLECTION_PATH,
106                                self.DEPLOYMENTS_ATOM_FEED_TITLE),
107            self.createAtomFeed(ec.BASE_COLLECTION_PATH + type + ec.GRANULE_COLLECTION_PATH,
108                                self.ENTITYLEVEL_ATOM_FEED_TITLE %VTD.TERM_DATA[VTD.GRANULE_TERM].title)
109
110        self.createAtomFeed(ec.DIF_COLLECTION_PATH,
111                            self.TOPLEVEL_DIF_FEED_TITLE)
112
113
114    def addAtomToFeeds(self, atom):
115        '''
116        Add an entry to the different feeds associated with the specified atom
117        @param atom: an Atom object which should be linked to as an entry in the
118        eXist feeds
119        '''
120        # add to top level feed
121        self.createAtomFeedEntry(ec.BASE_COLLECTION_PATH, atom)
122        # add to entity level feed
123        self.createAtomFeedEntry(atom.getDefaultEntityCollectionPath(), atom)
124       
125        # add to provider level feed
126        self.createAtomFeedEntry(ec.PROVIDER_FEED_PATH + atom.ME.providerID + '/', atom)
127       
128        # add to DIF feeds - if appropriate
129        if atom.isDE():
130            self.createAtomFeedEntry(ec.DIF_COLLECTION_PATH, atom, isDIFRecord=True)
131            self.createAtomFeedEntry(ec.DIF_COLLECTION_PATH + atom.ME.providerID + '/', 
132                                     atom, isDIFRecord=True)
133       
134
135    def createAtomFeed(self, collection, title):
136        '''
137        Set the specified collection up as an atom feed
138        @param collection: eXist collection to set up as an atom feed
139        @param title: title to give the feed
140        '''
141        feedXML = self.FEED_STRING %(title)
142        logging.info("Setting up %s as an atom feed" %collection)
143        self.__postEditAtomFeed(collection, feedXML)
144        logging.info("%s successfully set up as an atom feed" %collection)
145
146
147    def getAtomFeed(self, collection):
148        '''
149        Retrieve the contents of the feed for the specified eXist collection
150        - in a feed parser object
151        @param collection: eXist collection to retrieve the feed from
152        @return feed: feed in a feedparser object
153        '''
154        logging.info("Retrieving content of atom feed for collection, '%s'" %collection)
155        feed = feedparser.parse('http://%s/exist/atom/content/%s' \
156                                %(self.hostAndPort, collection))
157       
158        # check we've got something valid back
159        if feed.bozo:
160            errorMessage = "%s: '%s'" %(self.FEED_ERROR_MSG, feed.bozo_exception)
161            raise FeedClientError(errorMessage)
162       
163        return feed
164       
165
166    def createAtomFeedEntry(self, collection, atom, isDIFRecord=False):
167        '''
168        Add an entry to the specified collection (which should already have a
169        feed set up on it) - pointing to the specified atom link
170        @param collection: eXist collection with the atom feed to add the entry to
171        @param atom: Atom to be referenced in the new entry
172        @keyword isDIFRecord: if True, the entry contents are adjusted to
173        point to the DIF doc rather than the atom doc.  Default False.
174        '''
175        titleString = '%s Atom' %atom.atomTypeName
176        browseURL = atom.atomBrowseURL
177        if isDIFRecord:
178            titleString = self.DIF_ENTRY_TITLE
179            browseURL = browseURL.replace('__%s__' %ndgObject.ATOM_DOC_TYPE, 
180                                          '__%s__' %ndgObject.BROWSE_DIF_DOC_TYPE)
181           
182        entryXML = '<?xml version="1.0" ?>\
183<entry xmlns="http://www.w3.org/2005/Atom">\
184<title>%s [%s]</title>\
185<summary>%s</summary>\
186<content src="%s" type="application/atom+xml"/>\
187</entry>' %(titleString, atom.title, atom.Summary, browseURL)
188        logging.info("Adding link to atom (%s) as entry in atom feed, %s" \
189                     %(browseURL, collection))
190        self.__postEditAtomFeed(collection, entryXML)
191        logging.info("Successfully added new feed entry")
192
193
194    def getRESTfulData(self, docPath, suppressPrefix = False):
195        '''
196        Retrieve data using the REST interface
197        @param docPath: path to the data to retrieve - NB, this should equal
198        the DB path to the document in question
199        @keyword suppressPrefix: if True, the base, REST url, is not added to
200        the specified docPath
201        @return document, if found, None, otherwise
202        '''
203        logging.info("Retrieving document at, %s, using RESTful interface" %docPath)
204        connection = httplib.HTTPConnection(self.hostAndPort)
205        url = docPath
206        if not suppressPrefix:
207            url = self.REST_BASE_URL + docPath
208
209        connection.request("GET", '%s' %(url))
210        response = connection.getresponse()
211       
212        if response.status == 200:
213            logging.info("Document successfully retrieved - returning this now")
214            return response.read()
215       
216        logging.info("- no document found at path, '%s'" %docPath)
217        return None
218       
Note: See TracBrowser for help on using the repository browser.