source: ndgCommon/trunk/ndg/common/src/dal/ndgRetrieve.py @ 5184

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ndgCommon/trunk/ndg/common/src/dal/ndgRetrieve.py@5184
Revision 5184, 9.8 KB checked in by cbyrom, 11 years ago (diff)

Simplify backups structure in eXist - putting all backups under a
single toplevel structure.

Line 
1'''
2 Class allowing the retrieval of ndg documents via their ndgObject, with
3 security in place.
4 
5 @author: B Lawrence?, C Byrom, Tessella Feb 09
6'''
7import cgi,time, logging
8from ndg.common.src.clients.xmldb.eXist.searchclient import SearchClient
9from ndg.common.src.clients.http.httpsearchclient import HTTPSearchClient
10import ndg.common.src.clients.xmldb.eXist.dbconstants as dc
11from ndg.common.src.clients.ws.discovery.discoveryserviceclient import DiscoveryServiceClient
12from ndg.common.src.models.ndgObject import ndgObject as no
13
14
15class NDGRetrieveError(Exception):
16    """
17    Exception handling for NDG Retrieve method.
18    """
19    def __init__(self, msg):
20        logging.error(msg)
21        Exception.__init__(self, msg)
22
23
24class NDGRetrieve(object):
25
26    def __init__(self, config, useRemoteEXist = False, useDiscovery = True):
27        '''
28        Constructor for NDGRetrieve object
29        @param config: config object to use with object
30        @keyword useRemoteEXist: if True, set default behaviour to use a remote eXist
31        DB when retrieving data - via a (secured) restful http call.  Default = False
32        @keyword useDiscovery: if True, AND the schema is one of [DIF,MDIP,or ISO]
33        use the discovery ws to retrieve ndg docs
34        NB, cannot have both the useRemoteEXist and useDiscovery = True
35        - also, note, these are just the default values; they can be overridden on
36        calls to retrieveDoc()
37        '''
38        logging.info("Instantiating NDGRetrieve")
39        self.config = config
40        self.__getEXistConfigData()
41        self.useRemoteEXist = useRemoteEXist
42        self.useDiscovery = useDiscovery
43        logging.info("NDGRetrieve instantiated")
44
45   
46    def __getClient(self, ndgObject, useRemoteEXist, useDiscovery):
47        '''
48        Set up the client to the retrieve the ndg doc - NB, this can be
49        via a webservice client or an eXist client - which implement the
50        interfacesearchclient
51        @param ndgObject: ndgObject representing doc to retrieve
52        @param useRemoteEXist: if True, set default behaviour to use a remote eXist
53        DB when retrieving data - via a (secured) restful http call.
54        @param useDiscovery: if True, AND the schema is one of [DIF,MDIP,or ISO]
55        use the discovery ws to retrieve ndg docs
56        '''
57        logging.debug("Getting approprate connection to retrieve document")
58        if useRemoteEXist and useDiscovery:
59            raise ValueError('Invalid options to NDGRetrieve (useRemoteEXist AND useDiscovery impossible)')
60       
61        if useDiscovery:
62            return self.__getDiscoveryClient(ndgObject)
63
64        return self.__getEXistClient(ndgObject, useRemoteEXist)
65
66
67    def __getDiscoveryClient(self, ndgObject):
68        '''
69        Return discovery client, if appropriate, for retrieving ndgObject
70        @param ndgObject: ndgObject associated with doc to retrieve
71        @raise NDGRetrieveError if object not of appropriate schema to use
72        the discovery service for
73        @return DiscoveryServiceClient
74        '''
75        logging.debug("Getting Discovery service client")
76        if ndgObject.schema in no.DISCOVERY_SCHEMES:
77            client =  DiscoveryServiceClient()
78            logging.debug("- returning discovery service client")
79            return client
80        else:
81            raise NDGRetrieveError('Invalid schema (%s) with useDiscovery \
82                option in NDGRetrieve - should be one of %s' 
83                %(ndgObject.schema, no.DISCOVERY_SCHEMES))
84
85
86    def __getEXistClient(self, ndgObject, useRemoteEXist):
87        '''
88        Return EXist client, if appropriate, for retrieving ndgObject
89        @param ndgObject: ndgObject associated with doc to retrieve
90        @param useRemoteEXist: if True, set default behaviour to use a remote eXist
91        DB when retrieving data - via a (secured) restful http call.
92        @raise NDGRetrieveError if configuration not set correctly
93        @return client with getNDGDoc method available to retrieve ndgObject
94        from an eXist DB
95        '''
96        logging.debug("Getting Browse service client")
97
98        # lookup the repository associated with the ndgObject data - and compare
99        browseRepository = None
100        try:
101            browseRepository = self.config.get('NDG_B_SERVICE', ndgObject.repository)
102        except:
103            raise NDGRetrieveError("Config file not available or incomplete " + \
104                "- cannot find data for NDG_B_SERVICE, %s" %ndgObject.repository)
105       
106        logging.debug("NDG_B_SERVICE: '%s', default repository: '%s'" 
107                      %(browseRepository, self.localRepository))
108
109        # firstly check we are using a locally available repository
110        if browseRepository == self.localRepository:
111            try:
112                logging.debug("Data available locally - in eXist repository, '%s'" 
113                              %ndgObject.repository)
114                eXistDB = self.config.get('NDG_EXIST', ndgObject.repository)
115
116                logging.debug("Retrieving data from eXist DB, '%s'" %eXistDB)
117                client = SearchClient(dbHostName = eXistDB, configFileName = self.pwf)
118                logging.debug("Returning client to local eXist DB")
119                return client
120            except:
121                raise NDGRetrieveError('Config file not available or has \
122                    no [NDG_EXIST] setting')
123
124        # if not local, we need to be able to use remote lookups
125        if not useRemoteEXist:
126            raise NDGRetrieveError('The uri [%s] is not available on local repository [%s]'
127                                   %(ndgObject, self.localRepository))
128
129        logging.debug("Using remote eXist DB to retrieve data")
130        client =  HTTPSearchClient(browseRepository, proxyServer = self.proxyServer)
131        logging.debug("Returning client to remote eXist DB")
132        return client
133       
134
135    def __getEXistConfigData(self):
136        '''
137        Lookup standard eXist configuration data required to retrieve the docs
138        '''
139        self.localRepository = self.config.get('DEFAULT', 'repository')
140        self.pwf= self.config.get('NDG_EXIST','passwordFile')
141        self.proxyServer = self.config.get('DEFAULT','proxyServer')
142
143
144    def retrieveDoc(self, ndgObject, outputFormat = None,
145                    useRemoteEXist = None, useDiscovery = None, filterAtoms = False):
146        '''
147        Retrieve the doc represented by the specified ndgURI in the specified format
148        - using security when set up.
149        @param ndgObject: ndgObject representing doc to retrieve
150        @keyword outputFormat: string specifying output format to return doc in.
151        Default = None => doc is returned in original format
152        @keyword useRemoteEXist: if True, set default behaviour to use a remote eXist
153        DB when retrieving data - via a (secured) restful http call.  Default = False
154        @keyword useDiscovery: if True, AND the schema is one of [DIF,MDIP,or ISO]
155        use the discovery ws to retrieve ndg docs
156        NB, cannot have both the useRemoteEXist and useDiscovery = True
157        - also, note, these are just the default values; they can be overridden on
158        calls to retrieveDoc()
159        @keyword filterAtoms: if True, remove any atoms in the Working state - NB, these
160        should not be visible in 'browse' mode
161        @return: status, doc: status = 0 if fails, or 1 if succeeds. doc = retrieved
162        doc in string format
163        '''
164        logging.info("Retrieving NDG doc with URI, %s" %ndgObject)
165        # allow users to adjust retrieval settings on a per retrieval basis
166        remote = self.useRemoteEXist
167        if useRemoteEXist is not None:
168            remote = useRemoteEXist
169           
170        discovery = self.useDiscovery
171        if useDiscovery is not None:
172            discovery = useDiscovery
173           
174        client = self.__getClient(ndgObject, remote, discovery)
175     
176        if ndgObject.schema==no.NDGA0_DOC_TYPE:
177            target = dc.NDG_A_COLLECTION_PATH
178        elif ndgObject.schema == no.NUMSIM_DOC_TYPE:
179            target = dc.NUMSIM_COLLECTION_PATH
180        elif ndgObject.schema == no.ATOM_DOC_TYPE:
181            target = dc.ATOM_COLLECTION_PATH
182        elif ndgObject.schema == no.ATOM_BACKUP_DOC_TYPE:
183            target = dc.BACKUP_COLLECTION_PATH + dc.ATOM_COLLECTION_PATH
184        elif ndgObject.schema == no.BROWSE_DIF_DOC_TYPE:
185            target = dc.DIF_COLLECTION_PATH
186        else:
187            target = dc.MOLES_COLLECTION_PATH
188       
189        # strip trainling slashes, if required
190        if target.endswith('/'):
191            target = target[0:-1]
192           
193        #do the actual retrieve:
194        error = None
195        try:
196            time1 = time.time()
197            doc = client.getNDGDoc(ndgObject.repository,ndgObject.schema,
198                                   ndgObject.localID, targetCollection=target)
199            time2 = time.time()-time1
200            logging.info('Document retrieve [%s] took [%s]'%(ndgObject, time2))
201           
202            if doc and filterAtoms:
203                xq = client.resources.xq['atomFullPath']
204               
205                # set the query up with the correct target collection
206                xq=xq.replace('TargetCollection', target)
207                xq=xq.replace('RepositoryID',ndgObject.repository)
208                xq=xq.replace('LocalID',ndgObject.localID)
209                docPath = client.client.runQuery(xq)
210                if docPath[0].find('/db/atoms/working') > -1:
211                    doc = "Document unavailable for view"
212                    return 0, doc
213               
214        except Exception,e:
215            error = str(e)
216            doc = error
217            logging.error('Document retrieve [%s] failed [%s]'%(ndgObject, error))
218       
219        if error is not None:
220            return 0,'Document retrieve [%s] failed [%s]'%(ndgObject, cgi.escape(error))
221       
222        return 1, doc
223       
Note: See TracBrowser for help on using the repository browser.