source: MILK/trunk/milk_server/milk_server/lib/ndgInterface.py @ 5745

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/MILK/trunk/milk_server/milk_server/lib/ndgInterface.py@5745
Revision 5745, 9.1 KB checked in by sdonegan, 11 years ago (diff)

Adjusted so as to provide discovery service API url to the methods called in ndgCommon - taken from mlik.config file and finally gets rid of that damned elusive hardwired url..

Line 
1# Copyright (C) 2007 STFC & NERC (Science and Technology Facilities Council).
2# This software may be distributed under the terms of the
3# Q Public License, version 1.0 or later.
4# http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
5"""
6Manages interface to NDG documents and data, including caching
7"""
8
9from cache import Cache
10from csml_cache import CSMLExtractCache
11import os, logging
12from ndg.common.src.dal.ndgRetrieve import NDGRetrieve
13from ndg.common.src.models.ndgObject import ndgObject
14from xml.etree import ElementTree as ET
15from pylons import request, session
16
17from pylons import g # for handle to access control PEP interface
18try:
19    from ndg.security.common.authz.pdp import PDPError
20    from ndg.security.common.authz.pep import PEPError
21except ImportError, e:
22    from warnings import warn
23    warn(__name__ + ": access control is disabled: %s" % e, RuntimeWarning)
24
25class ndgInterface:
26   
27    def __init__(self):
28        '''
29        Constructor to set up object
30        '''
31        self.CSMLDataCache = None
32        self.CSMLDocCache = None
33        self.XMLHCache = None
34
35       
36    def SetupCaches(self, csmlDataCache = None, csmlDocCache = None, \
37                    xmlHCache = None):
38        '''
39        Set up the required three level cache:
40                - a file cache
41                - a xmlhandler object cache, and
42                - a parsed CSML object cache.
43        @keyword csmlDataCache: an alternative csmlDataCache to use
44        @keyword csmlDocCache: an alternative csmlDocCache to use
45        @keyword xmlHCache: an alternative xmlHCache to use
46        '''
47        logging.info("Setting up ndgInterface caches")
48        if csmlDataCache:
49            self.CSMLDataCache = csmlDataCache
50        else:
51            self.CSMLDataCache=CSMLExtractCache(
52                request.environ['paste.config']['app_conf']['tmp_dir'],
53                max_size=10)
54       
55        if csmlDocCache:
56            self.CSMLDocCache = csmlDocCache
57        else:
58            self.CSMLDocCache = Cache(max_size=10)
59       
60        if xmlHCache:
61            self.XMLHCache = xmlHCache
62        else:
63            self.XMLHCache = Cache(max_size=10)
64        logging.info("Finished setting up caches")
65
66       
67    def __runRetrieveAndHandleErrors(self, ndgObject, outputFormat = None,
68                                     filterAtoms = False, discoveryServiceURL = None):
69        '''
70        Retrieve doc represented by ndgObject; if problem occurs during retrieval
71        catch this and pass back error in a tidy way
72        @param ndgObject: ndgObject representing doc to retrieve
73        @keyword outputFormat: format to return doc in
74        @keyword filterAtoms: if True, remove any atoms in the Working state - NB, these
75        should not be visible in 'browse' mode
76        @return: status, doc where status = 1 => success, 0 => failure
77        NB, when status = 0, doc will contain any associated error messages
78        '''
79       
80       
81        retrieveClient = NDGRetrieve(request.environ['ndgConfig'],
82                                     useDiscovery = g.standalone)
83       
84       
85        try:
86            status, doc = retrieveClient.retrieveDoc(ndgObject, 
87                                                     outputFormat = outputFormat,
88                                                     filterAtoms = filterAtoms, discoveryServiceURL = discoveryServiceURL)
89                       
90        except Exception, e:
91            logging.error("Problem occurred whilst retrieving doc, '%s'" %ndgObject)
92            logging.error("- error message: %s" %e)
93            status = 0
94            doc = str(e)
95           
96        return status, doc
97           
98
99    def GetXML(self, uri, outputSchema = None, useCache = True, filterAtoms = False):
100        '''
101        This method provides a secure interface to the server
102        document cache and a remote NDG exist. It is assumed that
103        the local filesystem is protected in that you can't get to
104        files except via the CSML api
105        @param uri: ndg format uri to locate doc from
106        @keyword outputSchema: format to return doc in. Default = None =>
107        return doc in its original format
108        @keyword useCache: check for data in the cache and use this, if set to
109        True (the default)   
110        @keyword filterAtoms: if True, remove any atoms in the Working state - NB, these
111        should not be visible in 'browse' mode
112        @return doc in string format
113        '''
114        #    Note that this method should not be used to obtain
115        #unsecured discovery documents, these are called directly
116        #in the retrieve controller!
117        logging.info("Getting XML from uri, '%s'" %uri)
118        if outputSchema:
119            logging.info("- return doc in new format: '%s'" %outputSchema)
120
121        try:
122            ndgObj = ndgObject(uri)
123            localFile=0
124        except ValueError:
125            ''' It's a local file not an ndg identifier '''
126            logging.info("File appears to be local - look for it there...")
127            ndgObj=uri
128            localFile=1
129       
130        if session and 'ndgCleared' in session:
131            cleared=session['ndgCleared']
132        else:
133            cleared=None
134       
135        # if we're requesting doc to be returned in a new format, or if
136        # the 'useCache' flag isn't set, do a new lookup of data
137        if outputSchema or not useCache:
138            status, xmlh = self.__runRetrieveAndHandleErrors(ndgObj, 
139                                                             outputFormat = outputSchema,
140                                                             filterAtoms = filterAtoms)
141        else:
142            if not self.XMLHCache:
143                self.SetupCaches()
144               
145            try:
146                # NB, there doesn't appear to be a nice way to check if things
147                # are in the cache - either they are or an exception is thrown...
148                xmlh=self.XMLHCache[uri]
149                status=1
150                logging.info('XMLH Cache hit for [%s]'%uri)
151            except:
152                logging.info('XMLH Cache miss for [%s]'%uri)
153                if localFile:
154                    status,xmlH=self.__getLocal(uri)
155                else:
156                   
157                    logging.info("Attempt to lookup document directly")
158                   
159                    localConfig = request.environ['ndgConfig']
160                    discoveryServiceURL= localConfig.get('DISCOVERY','discoveryServiceURL')
161                    #discoveryServiceURL = "http://this is a local rubbish url to test whether picked up or not!"
162                   
163                    status, xmlh = self.__runRetrieveAndHandleErrors(ndgObj, 
164                                                             outputFormat = outputSchema, discoveryServiceURL=discoveryServiceURL)
165                   
166                if status:
167                    logging.info("Document retrieved successfully - adding to cache") 
168                    self.XMLHCache[uri] = xmlh
169           
170        if not status: 
171            return status,xmlh
172       
173        # valid values of the return objects SHOULD BE
174        #   ok:         status=1, xmlh=an xml handler instance.
175        #   exceptions, status=0, xmlh='Exception(e)'
176       
177#        status,xmlh=self.__gatekeep(ndgObj,xmlh)
178#        if status:
179#            if cleared is None:
180#                session['ndgCleared']=[uri]
181#            else:
182#                session['ndgCleared'].append(uri)
183#            session.save()
184       
185        if cleared is None:
186            session['ndgCleared']=[uri]
187        else:
188            session['ndgCleared'].append(uri)
189        session.save()
190        return status, xmlh
191           
192
193    def __gatekeep(self,uri,x):
194        ''' This is the NDG gatekeeper '''
195        if 'ndgSec' in session:
196            securityTokens=session['ndgSec']
197        else:
198            securityTokens=None
199
200        if not hasattr(g, 'pep'):
201            if not g.standalone:
202                raise PEPError(\
203                "Security is disabled but the standalone flag is set to False")
204               
205            logging.info("__gatekeep: access control is disabled - standalone " + \
206                     "config flag is set")
207           
208        try:
209            # Arguments are: a handle to the resource and a handle to the users
210            # security tokens
211            g.pep(dict(uri=uri, doc=x), securityTokens, None)
212            return True, x
213       
214        except PDPError, e:
215            # Caught a known access control condition
216            return False, 'Access Denied for %s %s' % (uri, e)
217
218               
219    def __getLocal(self,uri):
220        '''
221        Returns a local csml file (used for testing)
222        '''
223        logging.info("Attempt to retrieve doc locally - from %s" %uri)
224        csml_dir = request.environ['paste.config']['app_conf']['csml_dir']
225        path = os.path.join(csml_dir, file)
226        if os.path.exists(path+'.csml'):
227            f = path+'.csml'
228        elif os.path.exists(path+'.xml'):
229            f = path +'.xml'
230        else:
231            logging.info("- cannot find file")
232            return 0, '<p>Cannot find CSML file %s</p>' % file
233        r=f.read()
234        logging.info("- returning file contents")
235        return 1,r     
Note: See TracBrowser for help on using the repository browser.