source: ndgCommon/trunk/ndg/common/src/dal/ndgRetrieve.py @ 5747

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/dal/ndgRetrieve.py
Revision 5747, 10.2 KB checked in by sdonegan, 11 years ago (diff)

takes discoveryService api url as optional argument to replaced hardcoded and P.I.A. discoveyURL previously

Line 
1'''
2 Class allowing the retrieval of ndg documents via their ndgObject, with
3 security in place.
4 
5 @author: B Lawrence?, C Byrom, Tessella Feb 09
6'''
7import cgi,time, logging
8from ndg.common.src.clients.xmldb.eXist.searchclient import SearchClient
9from ndg.common.src.clients.http.httpsearchclient import HTTPSearchClient
10import ndg.common.src.clients.xmldb.eXist.dbconstants as dc
11from ndg.common.src.clients.ws.discovery.discoveryserviceclient import DiscoveryServiceClient
12from ndg.common.src.models.ndgObject import ndgObject as no
13
14
15class NDGRetrieveError(Exception):
16    """
17    Exception handling for NDG Retrieve method.
18    """
19    def __init__(self, msg):
20        logging.error(msg)
21        Exception.__init__(self, msg)
22
23
24class NDGRetrieve(object):
25
26    def __init__(self, config, useRemoteEXist = False, useDiscovery = True):
27        '''
28        Constructor for NDGRetrieve object
29        @param config: config object to use with object
30        @keyword useRemoteEXist: if True, set default behaviour to use a remote eXist
31        DB when retrieving data - via a (secured) restful http call.  Default = False
32        @keyword useDiscovery: if True, AND the schema is one of [DIF,MDIP,or ISO]
33        use the discovery ws to retrieve ndg docs
34        NB, cannot have both the useRemoteEXist and useDiscovery = True
35        - also, note, these are just the default values; they can be overridden on
36        calls to retrieveDoc()
37        '''
38       
39        logging.info("Instantiating NDGRetrieve")
40       
41        self.config = config
42        self.__getEXistConfigData()
43        self.useRemoteEXist = useRemoteEXist
44        self.useDiscovery = useDiscovery
45       
46        logging.info("NDGRetrieve instantiated")
47
48   
49    def __getClient(self, ndgObject, useRemoteEXist, useDiscovery, discoveryServiceURL=None):
50        '''
51        Set up the client to the retrieve the ndg doc - NB, this can be
52        via a webservice client or an eXist client - which implement the
53        interfacesearchclient
54        @param ndgObject: ndgObject representing doc to retrieve
55        @param useRemoteEXist: if True, set default behaviour to use a remote eXist
56        DB when retrieving data - via a (secured) restful http call.
57        @param useDiscovery: if True, AND the schema is one of [DIF,MDIP,or ISO]
58        use the discovery ws to retrieve ndg docs
59        @param discoveryServiceURL: optional param for discoveryService API URL
60        '''
61        logging.debug("Getting approprate connection to retrieve document")
62       
63        if useRemoteEXist and useDiscovery:
64            raise ValueError('Invalid options to NDGRetrieve (useRemoteEXist AND useDiscovery impossible)')
65       
66        if useDiscovery:
67            return self.__getDiscoveryClient(ndgObject, discoveryServiceURL)
68
69        return self.__getEXistClient(ndgObject, useRemoteEXist)
70
71
72    def __getDiscoveryClient(self, ndgObject, discoveryServiceURL=None):
73        '''
74        Return discovery client, if appropriate, for retrieving ndgObject
75        @param ndgObject: ndgObject associated with doc to retrieve
76        @param discoveryServiceURL: optional parameter stating the API to use.  If none, discoverService will default to value in discoveryServiceClient.py
77        @raise NDGRetrieveError if object not of appropriate schema to use
78        the discovery service for
79        @return DiscoveryServiceClient
80        '''
81        logging.debug("Getting Discovery service client")
82        if ndgObject.schema in no.DISCOVERY_SCHEMES:
83           
84            client =  DiscoveryServiceClient(None,discoveryServiceURL)
85            logging.debug("- returning discovery service client")
86            return client
87        else:
88            raise NDGRetrieveError('Invalid schema (%s) with useDiscovery \
89                option in NDGRetrieve - should be one of %s' 
90                %(ndgObject.schema, no.DISCOVERY_SCHEMES))
91
92
93    def __getEXistClient(self, ndgObject, useRemoteEXist):
94        '''
95        Return EXist client, if appropriate, for retrieving ndgObject
96        @param ndgObject: ndgObject associated with doc to retrieve
97        @param useRemoteEXist: if True, set default behaviour to use a remote eXist
98        DB when retrieving data - via a (secured) restful http call.
99        @raise NDGRetrieveError if configuration not set correctly
100        @return client with getNDGDoc method available to retrieve ndgObject
101        from an eXist DB
102        '''
103        logging.debug("Getting Browse service client")
104
105        # lookup the repository associated with the ndgObject data - and compare
106        browseRepository = None
107        try:
108            browseRepository = self.config.get('NDG_B_SERVICE', ndgObject.repository)
109        except:
110            raise NDGRetrieveError("Config file not available or incomplete " + \
111                "- cannot find data for NDG_B_SERVICE, %s" %ndgObject.repository)
112       
113        logging.debug("NDG_B_SERVICE: '%s', default repository: '%s'" 
114                      %(browseRepository, self.localRepository))
115
116        # firstly check we are using a locally available repository
117        if browseRepository == self.localRepository:
118            try:
119                logging.debug("Data available locally - in eXist repository, '%s'" 
120                              %ndgObject.repository)
121                eXistDB = self.config.get('NDG_EXIST', ndgObject.repository)
122
123                logging.debug("Retrieving data from eXist DB, '%s'" %eXistDB)
124                client = SearchClient(dbHostName = eXistDB, configFileName = self.pwf)
125                logging.debug("Returning client to local eXist DB")
126                return client
127            except:
128                raise NDGRetrieveError('Config file not available or has \
129                    no [NDG_EXIST] setting')
130
131        # if not local, we need to be able to use remote lookups
132        if not useRemoteEXist:
133            raise NDGRetrieveError('The uri [%s] is not available on local repository [%s]'
134                                   %(ndgObject, self.localRepository))
135
136        logging.debug("Using remote eXist DB to retrieve data")
137        client =  HTTPSearchClient(browseRepository, proxyServer = self.proxyServer)
138        logging.debug("Returning client to remote eXist DB")
139        return client
140       
141
142    def __getEXistConfigData(self):
143        '''
144        Lookup standard eXist configuration data required to retrieve the docs
145        '''
146        self.localRepository = self.config.get('DEFAULT', 'repository')
147        self.pwf= self.config.get('NDG_EXIST','passwordFile')
148        self.proxyServer = self.config.get('DEFAULT','proxyServer')
149
150
151    def retrieveDoc(self, ndgObject, outputFormat = None,
152                    useRemoteEXist = None, useDiscovery = None, filterAtoms = False, discoveryServiceURL = None):
153        '''
154        Retrieve the doc represented by the specified ndgURI in the specified format
155        - using security when set up.
156        @param ndgObject: ndgObject representing doc to retrieve
157        @keyword outputFormat: string specifying output format to return doc in.
158        Default = None => doc is returned in original format
159        @keyword useRemoteEXist: if True, set default behaviour to use a remote eXist
160        DB when retrieving data - via a (secured) restful http call.  Default = False
161        @keyword useDiscovery: if True, AND the schema is one of [DIF,MDIP,or ISO]
162        use the discovery ws to retrieve ndg docs
163        NB, cannot have both the useRemoteEXist and useDiscovery = True
164        - also, note, these are just the default values; they can be overridden on
165        calls to retrieveDoc()
166        @keyword filterAtoms: if True, remove any atoms in the Working state - NB, these
167        should not be visible in 'browse' mode
168        @return: status, doc: status = 0 if fails, or 1 if succeeds. doc = retrieved
169        doc in string format
170        '''
171       
172       
173        logging.info("Retrieving NDG doc with URI, %s" %ndgObject)
174        # allow users to adjust retrieval settings on a per retrieval basis
175        remote = self.useRemoteEXist
176        if useRemoteEXist is not None:
177            remote = useRemoteEXist
178           
179        discovery = self.useDiscovery
180        if useDiscovery is not None:
181            discovery = useDiscovery
182           
183       
184           
185        client = self.__getClient(ndgObject, remote, discovery, discoveryServiceURL)
186     
187        if ndgObject.schema==no.NDGA0_DOC_TYPE:
188            target = dc.NDG_A_COLLECTION_PATH
189        elif ndgObject.schema == no.NUMSIM_DOC_TYPE:
190            target = dc.NUMSIM_COLLECTION_PATH
191        elif ndgObject.schema == no.ATOM_DOC_TYPE:
192            target = dc.ATOM_COLLECTION_PATH
193        elif ndgObject.schema == no.ATOM_BACKUP_DOC_TYPE:
194            target = dc.BACKUP_COLLECTION_PATH + dc.ATOM_COLLECTION_PATH
195        elif ndgObject.schema == no.BROWSE_DIF_DOC_TYPE:
196            target = dc.DIF_COLLECTION_PATH
197        else:
198            target = dc.MOLES_COLLECTION_PATH
199       
200        # strip trainling slashes, if required
201        if target.endswith('/'):
202            target = target[0:-1]
203           
204        #do the actual retrieve:
205        error = None
206        try:
207            time1 = time.time()
208            doc = client.getNDGDoc(ndgObject.repository,ndgObject.schema,
209                                   ndgObject.localID, targetCollection=target)
210            time2 = time.time()-time1
211            logging.info('Document retrieve [%s] took [%s]'%(ndgObject, time2))
212           
213            if doc and filterAtoms:
214                xq = client.resources.xq['atomFullPath']
215               
216                # set the query up with the correct target collection
217                xq=xq.replace('TargetCollection', target)
218                xq=xq.replace('RepositoryID',ndgObject.repository)
219                xq=xq.replace('LocalID',ndgObject.localID)
220                docPath = client.client.runQuery(xq)
221                if docPath[0].find('/db/atoms/working') > -1:
222                    doc = "Document unavailable for view"
223                    return 0, doc
224               
225        except Exception,e:
226            error = str(e)
227            doc = error
228            logging.error('Document retrieve [%s] failed [%s]'%(ndgObject, error))
229       
230        if error is not None:
231            return 0,'Document retrieve [%s] failed [%s]'%(ndgObject, cgi.escape(error))
232       
233        return 1, doc
234       
Note: See TracBrowser for help on using the repository browser.