source: ndgCommon/trunk/ndg/common/src/dal/ndgRetrieve.py @ 4988

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/dal/ndgRetrieve.py@4988
Revision 4988, 9.2 KB checked in by cbyrom, 11 years ago (diff)

Refactor ndgRetrieve making much more structured + turning into an
object to allow simple re-use + improve documentation and logging +
update unit tests.

Line 
1'''
2 Class allowing the retrieval of ndg documents via their ndgObject, with
3 security in place.
4 
5 @author: B Lawrence?, C Byrom, Tessella Feb 09
6'''
7import cgi,time, logging
8from ndg.common.src.clients.xmldb.eXist.searchclient import SearchClient
9from ndg.common.src.clients.http.httpsearchclient import HTTPSearchClient
10import ndg.common.src.clients.xmldb.eXist.dbconstants as dc
11from ndg.common.src.clients.ws.discoveryserviceclient import DiscoveryServiceClient
12from ndg.common.src.models.ndgObject import ndgObject as no
13
14
15class NDGRetrieveError(Exception):
16    """
17    Exception handling for NDG Retrieve method.
18    """
19    def __init__(self, msg):
20        logging.error(msg)
21        Exception.__init__(self, msg)
22
23
24class NDGRetrieve(object):
25
26    def __init__(self, config, useRemoteEXist = False, useDiscovery = True):
27        '''
28        Constructor for NDGRetrieve object
29        @param config: config object to use with object
30        @keyword useRemoteEXist: if True, set default behaviour to use a remote eXist
31        DB when retrieving data - via a (secured) restful http call.  Default = False
32        @keyword useDiscovery: if True, AND the schema is one of [DIF,MDIP,or ISO]
33        use the discovery ws to retrieve ndg docs
34        NB, cannot have both the useRemoteEXist and useDiscovery = True
35        - also, note, these are just the default values; they can be overridden on
36        calls to retrieveDoc()
37        '''
38        logging.info("Instantiating NDGRetrieve")
39        self.config = config
40        self.__getEXistConfigData()
41        self.useRemoteEXist = useRemoteEXist
42        self.useDiscovery = useDiscovery
43        logging.info("NDGRetrieve instantiated")
44
45   
46    def __getClient(self, ndgObject, useRemoteEXist, useDiscovery):
47        '''
48        Set up the client to the retrieve the ndg doc - NB, this can be
49        via a webservice client or an eXist client - which implement the
50        interfacesearchclient
51        @param ndgObject: ndgObject representing doc to retrieve
52        @param useRemoteEXist: if True, set default behaviour to use a remote eXist
53        DB when retrieving data - via a (secured) restful http call.
54        @param useDiscovery: if True, AND the schema is one of [DIF,MDIP,or ISO]
55        use the discovery ws to retrieve ndg docs
56        '''
57        logging.debug("Getting approprate connection to retrieve document")
58        if useRemoteEXist and useDiscovery:
59            raise ValueError('Invalid options to NDGRetrieve (useRemoteEXist AND useDiscovery impossible)')
60       
61        if useDiscovery:
62            return self.__getDiscoveryClient(ndgObject)
63
64        return self.__getEXistClient(ndgObject, useRemoteEXist)
65
66
67    def __getDiscoveryClient(self, ndgObject):
68        '''
69        Return discovery client, if appropriate, for retrieving ndgObject
70        @param ndgObject: ndgObject associated with doc to retrieve
71        @raise NDGRetrieveError if object not of appropriate schema to use
72        the discovery service for
73        @return DiscoveryServiceClient
74        '''
75        logging.debug("Getting Discovery service client")
76        if ndgObject.schema in no.DISCOVERY_SCHEMES:
77            client =  DiscoveryServiceClient()
78            logging.debug("- returning discovery service client")
79            return client
80        else:
81            raise NDGRetrieveError('Invalid schema (%s) with useDiscovery \
82                option in NDGRetrieve - should be one of %s' 
83                %(ndgObject.schema, no.DISCOVERY_SCHEMES))
84
85
86    def __getEXistClient(self, ndgObject, useRemoteEXist):
87        '''
88        Return EXist client, if appropriate, for retrieving ndgObject
89        @param ndgObject: ndgObject associated with doc to retrieve
90        @param useRemoteEXist: if True, set default behaviour to use a remote eXist
91        DB when retrieving data - via a (secured) restful http call.
92        @raise NDGRetrieveError if configuration not set correctly
93        @return client with getNDGDoc method available to retrieve ndgObject
94        from an eXist DB
95        '''
96        logging.debug("Getting Browse service client")
97
98        # lookup the repository associated with the ndgObject data - and compare
99        browseRepository = None
100        try:
101            browseRepository = self.config.get('NDG_B_SERVICE', ndgObject.repository)
102        except:
103            raise NDGRetrieveError("Config file not available or incomplete " + \
104                "- cannot find data for NDG_B_SERVICE, %s" %ndgObject.repository)
105       
106        logging.debug("NDG_B_SERVICE: '%s', default repository: '%s'" 
107                      %(browseRepository, self.localRepository))
108
109        # firstly check we are using a locally available repository
110        if browseRepository == self.localRepository:
111            try:
112                logging.debug("Data available locally - in eXist repository, '%s'" 
113                              %ndgObject.repository)
114                eXistDB = self.config.get('NDG_EXIST', ndgObject.repository)
115
116                logging.debug("Retrieving data from eXist DB, '%s'" %eXistDB)
117                client = SearchClient(dbHostName = eXistDB, configFileName = self.pwf)
118                logging.debug("Returning client to local eXist DB")
119                return client
120            except:
121                raise NDGRetrieveError('Config file not available or has \
122                    no [NDG_EXIST] setting')
123
124        # if not local, we need to be able to use remote lookups
125        if not useRemoteEXist:
126            raise NDGRetrieveError('The uri [%s] is not available on local repository [%s]'
127                                   %(ndgObject, self.localRepository))
128
129        logging.debug("Using remote eXist DB to retrieve data")
130        if not self.proxyServer:
131            raise NDGRetrieveError('No proxyServer settings available in config; these ' + \
132                                   'are required for looking up remote data')
133        client =  HTTPSearchClient(browseRepository, proxyServer = self.proxyServer)
134        logging.debug("Returning client to remote eXist DB")
135        return client
136       
137
138    def __getEXistConfigData(self):
139        '''
140        Lookup standard eXist configuration data required to retrieve the docs
141        '''
142        self.localRepository = self.config.get('DEFAULT', 'repository')
143        self.pwf= self.config.get('NDG_EXIST','passwordFile')
144        self.proxyServer = self.config.get('DEFAULT','proxyServer')
145       
146
147
148    def retrieveDoc(self, ndgObject, outputFormat = None,
149                    useRemoteEXist = None, useDiscovery = None):
150        '''
151        Retrieve the doc represented by the specified ndgURI in the specified format
152        - using security when set up.
153        @param ndgObject: ndgObject representing doc to retrieve
154        @keyword outputFormat: string specifying output format to return doc in.
155        Default = None => doc is returned in original format
156        @keyword useRemoteEXist: if True, set default behaviour to use a remote eXist
157        DB when retrieving data - via a (secured) restful http call.  Default = False
158        @keyword useDiscovery: if True, AND the schema is one of [DIF,MDIP,or ISO]
159        use the discovery ws to retrieve ndg docs
160        NB, cannot have both the useRemoteEXist and useDiscovery = True
161        - also, note, these are just the default values; they can be overridden on
162        calls to retrieveDoc()
163        @return: status, doc: status = 0 if fails, or 1 if succeeds. doc = retrieved
164        doc in string format
165        '''
166        logging.info("Retrieving NDG doc with URI, %s" %ndgObject)
167        # allow users to adjust retrieval settings on a per retrieval basis
168        remote = self.useRemoteEXist
169        if useRemoteEXist is not None:
170            remote = useRemoteEXist
171           
172        discovery = self.useDiscovery
173        if useDiscovery is not None:
174            discovery = useDiscovery
175           
176        client = self.__getClient(ndgObject, remote, discovery)
177     
178        if ndgObject.schema==no.NDGA0_DOC_TYPE:
179            target = dc.NDG_A_COLLECTION_PATH
180        elif ndgObject.schema == no.NUMSIM_DOC_TYPE:
181            target = dc.NUMSIM_COLLECTION_PATH
182        elif ndgObject.schema == no.ATOM_DOC_TYPE:
183            target = dc.ATOM_COLLECTION_PATH
184        elif ndgObject.schema == no.ATOM_BACKUP_DOC_TYPE:
185            target = dc.BACKUP_COLLECTION_PATH
186        elif ndgObject.schema == no.BROWSE_DIF_DOC_TYPE:
187            target = dc.DIF_COLLECTION_PATH
188        else:
189            target = dc.MOLES_COLLECTION_PATH
190       
191        # strip trainling slashes, if required
192        if target.endswith('/'):
193            target = target[0:-1]
194           
195        #do the actual retrieve:
196        error = None
197        try:
198            time1 = time.time()
199            doc = client.getNDGDoc(ndgObject.repository,ndgObject.schema,
200                                   ndgObject.localID, targetCollection=target)
201            time2 = time.time()-time1
202            logging.info('Document retrieve [%s] took [%s]'%(ndgObject, time2))
203        except Exception,e:
204            error = str(e)
205            doc = error
206            logging.error('Document retrieve [%s] failed [%s]'%(ndgObject, error))
207       
208        if error is not None:
209            return 0,'Document retrieve [%s] failed [%s]'%(ndgObject, cgi.escape(error))
210       
211        return 1, doc
212       
Note: See TracBrowser for help on using the repository browser.