source: exist/trunk/python/ndgUtils/eXistConnector.py @ 4782

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/python/ndgUtils/eXistConnector.py@5371
Revision 4782, 6.2 KB checked in by cbyrom, 11 years ago (diff)

Add code to allow publishing of data to multiple feeds - to offer
different levels of granularity to subscribe to (with appropriate feed
titles) + add code to
allow feed publication to be done asynchronously + extend to
run atom to DIF transforms at point of publish for atom docs
in the published state (i.e. with state of 'published' or 'Published').
Add code to ingest required organisation data when setting up
the eXist DB - and provide cache of this data to avoid unecessary
lookups. Also extend the eXist DB setup code to initialise the
various new feeds added.
Add utility methods to check if an eXist collection is new and to
establish the publication state of an atom doc - and to change
the state of this doc (NB, need to do a doc delete and doc create
for this - can't see a doc 'move' function in eXist). Also add
code to do the atom to DIF transform direct in the eXist DB.
Improve error handling when publishing feed data.

Line 
1# Code inspired by example on eXist website.
2import urllib2, base64, xmlrpclib, logging
3
4class InstanceObject(object):
5    def __init__(self, **kw):
6        self.dict={}
7        self.dict.update(kw)
8    def __getattr__(self,arg):
9        return self.dict[arg]
10    def __str__(self):
11        return 'InstanceObject: %s '%self.dict
12       
13class edict(dict):
14    '''An extended dictionary which allows one to set and get values
15    as attributes (kudos Joe Gregorio's 1812)
16    The extended part allows you to get and set values as attributes.
17    That is,
18       d.fred
19    is the same as
20       d['fred']
21    '''
22    def __init__(self,**kw):
23        for a in kw:
24            self[a]=kw[a]
25    def __getattr__(self, key):
26        try:
27            return self.__dict__[key]
28        except KeyError:
29            pass
30        try:
31            assert not key.startswith('_')
32            return self.__getitem__(key)
33        except:
34            raise AttributeError, "object has no attribute '%s'" % key
35    def __setattr__(self, key, value):
36        if key.startswith('_'):
37            self.__dict__[key] = value
38        else:
39            return self.__setitem__(key, value)
40
41
42class eXistConnector(object):
43   
44    # default collections for the various file types in eXist
45    BASE_COLLECTION_PATH = "/db/atoms/"
46    OLD_COLLECTION_PATH = "old/"
47    PUBLISHED_COLLECTION_PATH = "Published/"
48    SMALL_P_PUBLISHED_COLLECTION_PATH = "published/"
49    WORKING_COLLECTION_PATH = "working/"
50    BACKUP_COLLECTION_PATH = "/db/atoms_backup/"
51    GRANULE_COLLECTION_PATH = "data_granules/"
52    DEPLOYMENT_COLLECTION_PATH = "deployment_data/"
53    DEPLOYMENTS_COLLECTION_PATH = "deployments/"
54    DE_COLLECTION_PATH = "data_entities/"
55    NDG_A_COLLECTION_PATH = "/db/ndg_A_metadata/"
56    NDG_A_COLLECTION_PATH_BACKUP = "/db/ndg_A_metadata_backup/"
57    DIF_COLLECTION_PATH = "/db/DIF/"
58    PROVIDER_FEED_PATH = BASE_COLLECTION_PATH + "providerFeeds/"
59
60    """Access class for eXist"""
61    def __init__(self,constants=None):
62        ''' Instantiates the eXist connector using supplied constants '''
63        logging.debug("Setting up xmlrpc connection to eXist")
64        if constants is None: raise 'NoExistConstants'
65        logging.debug("Host: '%s', User: '%s'" %(constants.host, constants.userid))
66        authinfo = urllib2.HTTPPasswordMgrWithDefaultRealm()
67        authinfo.add_password(None,
68                                  constants.host,
69                                  constants.userid,
70                                  constants.password)
71        authHandler = urllib2.HTTPBasicAuthHandler(authinfo)
72        opener = urllib2.build_opener(authHandler)
73        s = constants.userid+':'+constants.password
74        self.auth = base64.encodestring(s)[:-1] # strip trailing 12
75        opener.addheaders.append(('Authorization', 'Basic %s' % self.auth))
76        self.http_headers = {'Authorization':'Basic %s' % self.auth}
77        self.opener = opener
78        # also create an xmlrpc Server object
79       
80        xmlrpc_uri = '%s%s:%s@%s:%d%s' % ( 
81                                            'http://',
82                                            constants.userid,
83                                            constants.password,
84                                            constants.host,
85                                            constants.port,
86                                            constants.xmlrpc_base_path
87                                        )
88        self.xmlrpc = xmlrpclib.Server(xmlrpc_uri)
89        logging.debug("xmlrpc connection set up")
90
91
92    def executeQuery(self, xquery, params={}):
93        '''Execute an xquery string, return session and summary information'''
94        logging.debug("Executing xquery on eXist:\n%s" %xquery)
95        xquery=xmlrpclib.Binary(str(xquery))
96        id = self.xmlrpc.executeQuery(xquery, params)
97        summary = self.xmlrpc.querySummary(id)
98        logging.debug("XQuery executed")
99        return id,summary
100
101    def release(self,id):
102        ''' Release an executeQuery session '''
103        self.xmlrpc.releaseQueryResult(id)
104       
105    def retrieve(self,id,pos,params={}):
106        ''' Retrieve a specific document from an executeQuery result set '''
107        logging.debug("Retrieving document from eXist...")
108        xml = self.xmlrpc.retrieve(id,pos,params).data
109        logging.debug("Document retrieved.")
110        return xml
111 
112    def executeChunkedQuery(self,xquery,start,number,params={}):
113        ''' Execute a query, return a specific part of the result set, and
114        dump the session automagically '''
115        xquery=xmlrpclib.Binary(xquery)
116        r=self.xmlrpc.query(xquery,number,start,params)
117        return r
118   
119    def querySummary(self,id):
120        ''' Returns a summary of query results for the result-set referenced by id (which was returned by a previous query '''
121        return self.xmlrpc.querySummary(id)
122   
123    def getHits(self,id):
124        ''' Return the number of hits associated with the query that created session id '''
125        return self.xmlrpc.getHits(id)
126   
127    def getDoc(self,collectionName,documentName):
128        '''
129    Lightweight interface to the getDocument method
130    '''
131    # atoms have a more structured collection directory - as a result
132    # AtomList.xq returns the full path to the listed docs - so avoid
133    # concat'ing the collectionName for these
134        if documentName.startswith(collectionName):
135            name = documentName
136        else:
137            name='%s/%s'%(collectionName,documentName)
138        r=self.xmlrpc.getDocumentAsString(name,{})
139        return r
140   
141    def removeCollection(self,collectionPath):
142        ''' Remove a collection in the eXist database '''
143        r=self.xmlrpc.removeCollection(collectionPath)
144        return r
145   
146    def removeDoc(self, docPath):
147        ''' Remove a document from the eXist database '''
148        r=self.xmlrpc.remove(docPath)
149        return r
150   
151    def createCollection(self,collectionPath):
152        ''' Create a collection in the eXist database at collectionPath '''
153        logging.info("Creating collection: '%s'" %collectionPath)
154        r=self.xmlrpc.createCollection(collectionPath)
155        logging.info("Collection created")
156        return r
157   
158    def storeXML(self,xml,path,overwrite=0):
159        ''' Store some XML into the databse at path '''
160        return self.xmlrpc.parse(xml,path,overwrite)
161       
Note: See TracBrowser for help on using the repository browser.