source: MILK/trunk/milk_server/milk_server/lib/ndgInterface.py @ 4998

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/MILK/trunk/milk_server/milk_server/lib/ndgInterface.py@4998
Revision 4998, 8.0 KB checked in by cbyrom, 12 years ago (diff)

Improve handling and reporting of errors in document retrieval code.

Line 
1# Copyright (C) 2007 STFC & NERC (Science and Technology Facilities Council).
2# This software may be distributed under the terms of the
3# Q Public License, version 1.0 or later.
4# http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
5"""
6Manages interface to NDG documents and data, including caching
7"""
8
9from cache import Cache
10from csml_cache import CSMLExtractCache
11import os, logging
12from ndg.common.src.dal.ndgRetrieve import NDGRetrieve
13from ndg.common.src.models.ndgObject import ndgObject
14from xml.etree import ElementTree as ET
15from pylons import request, session
16
17from pylons import g # for handle to access control PEP interface
18try:
19    from ndg.security.common.authz.pdp import PDPError
20    from ndg.security.common.authz.pep import PEPError
21except ImportError, e:
22    from warnings import warn
23    warn(__name__ + ": access control is disabled: %s" % e, RuntimeWarning)
24
25class ndgInterface:
26   
27    def __init__(self):
28        '''
29        Constructor to set up object
30        '''
31        self.CSMLDataCache = None
32        self.CSMLDocCache = None
33        self.XMLHCache = None
34
35       
36    def SetupCaches(self, csmlDataCache = None, csmlDocCache = None, \
37                    xmlHCache = None):
38        '''
39        Set up the required three level cache:
40                - a file cache
41                - a xmlhandler object cache, and
42                - a parsed CSML object cache.
43        @keyword csmlDataCache: an alternative csmlDataCache to use
44        @keyword csmlDocCache: an alternative csmlDocCache to use
45        @keyword xmlHCache: an alternative xmlHCache to use
46        '''
47        logging.info("Setting up ndgInterface caches")
48        if csmlDataCache:
49            self.CSMLDataCache = csmlDataCache
50        else:
51            self.CSMLDataCache=CSMLExtractCache(
52                request.environ['paste.config']['app_conf']['tmp_dir'],
53                max_size=10)
54       
55        if csmlDocCache:
56            self.CSMLDocCache = csmlDocCache
57        else:
58            self.CSMLDocCache = Cache(max_size=10)
59       
60        if xmlHCache:
61            self.XMLHCache = xmlHCache
62        else:
63            self.XMLHCache = Cache(max_size=10)
64        logging.info("Finished setting up caches")
65
66       
67    def __runRetrieveAndHandleErrors(self, ndgObject, outputFormat = None):
68        '''
69        Retrieve doc represented by ndgObject; if problem occurs during retrieval
70        catch this and pass back error in a tidy way
71        @param ndgObject: ndgObject representing doc to retrieve
72        @keyword outputFormat: format to return doc in
73        @return: status, doc where status = 1 => success, 0 => failure
74        NB, when status = 0, doc will contain any associated error messages
75        '''
76        retrieveClient = NDGRetrieve(request.environ['ndgConfig'],
77                                     useDiscovery = g.standalone)
78
79        try:
80            status, doc = retrieveClient.retrieveDoc(ndgObject, 
81                                                     outputFormat = outputFormat)
82        except Exception, e:
83            logging.error("Problem occurred whilst retrieving doc, '%s'" %ndgObject)
84            logging.error("- error message: %s" %e)
85            status = 0
86            doc = str(e)
87           
88        return status, doc
89           
90
91    def GetXML(self, uri, outputSchema = None, useCache = True):
92        '''
93        This method provides a secure interface to the server
94        document cache and a remote NDG exist. It is assumed that
95        the local filesystem is protected in that you can't get to
96        files except via the CSML api
97        @param uri: ndg format uri to locate doc from
98        @keyword outputSchema: format to return doc in. Default = None =>
99        return doc in its original format
100        @keyword useCache: check for data in the cache and use this, if set to
101        True (the default)   
102        @return doc in string format
103        '''
104        #    Note that this method should not be used to obtain
105        #unsecured discovery documents, these are called directly
106        #in the retrieve controller!
107        logging.info("Getting XML from uri, '%s'" %uri)
108        if outputSchema:
109            logging.info("- return doc in new format: '%s'" %outputSchema)
110
111        try:
112            ndgObj = ndgObject(uri)
113            localFile=0
114        except ValueError:
115            ''' It's a local file not an ndg identifier '''
116            logging.info("File appears to be local - look for it there...")
117            ndgObj=uri
118            localFile=1
119       
120        if session and 'ndgCleared' in session:
121            cleared=session['ndgCleared']
122        else:
123            cleared=None
124       
125        # if we're requesting doc to be returned in a new format, or if
126        # the 'useCache' flag isn't set, do a new lookup of data
127        if outputSchema or not useCache:
128            status, xmlh = self.__runRetrieveAndHandleErrors(ndgObj, 
129                                                             outputFormat = outputSchema)
130        else:
131            if not self.XMLHCache:
132                self.SetupCaches()
133               
134            try:
135                # NB, there doesn't appear to be a nice way to check if things
136                # are in the cache - either they are or an exception is thrown...
137                xmlh=self.XMLHCache[uri]
138                status=1
139                logging.info('XMLH Cache hit for [%s]'%uri)
140            except:
141                logging.info('XMLH Cache miss for [%s]'%uri)
142                if localFile:
143                    status,xmlH=self.__getLocal(uri)
144                else:
145                    logging.info("Attempt to lookup document directly")
146                    status, xmlh = self.__runRetrieveAndHandleErrors(ndgObj, 
147                                                             outputFormat = outputSchema)
148                if status:
149                    logging.info("Document retrieved successfully - adding to cache") 
150                    self.XMLHCache[uri] = xmlh
151           
152        if not status: 
153            return status,xmlh
154       
155        # valid values of the return objects SHOULD BE
156        #   ok:         status=1, xmlh=an xml handler instance.
157        #   exceptions, status=0, xmlh='Exception(e)'
158       
159        status,xmlh=self.__gatekeep(ndgObj,xmlh)
160        if status:
161            if cleared is None:
162                session['ndgCleared']=[uri]
163            else:
164                session['ndgCleared'].append(uri)
165            session.save()
166       
167        return status, xmlh
168           
169
170    def __gatekeep(self,uri,x):
171        ''' This is the NDG gatekeeper '''
172        if 'ndgSec' in session:
173            securityTokens=session['ndgSec']
174        else:
175            securityTokens=None
176
177        if not hasattr(g, 'pep'):
178            if not g.standalone:
179                raise PEPError(\
180                "Security is disabled but the standalone flag is set to False")
181               
182            logging.info("__gatekeep: access control is disabled - standalone " + \
183                     "config flag is set")
184           
185        try:
186            # Arguments are: a handle to the resource and a handle to the users
187            # security tokens
188            g.pep(dict(uri=uri, doc=x), securityTokens, None)
189            return True, x
190       
191        except PDPError, e:
192            # Caught a known access control condition
193            return False, 'Access Denied for %s %s' % (uri, e)
194
195               
196    def __getLocal(self,uri):
197        '''
198        Returns a local csml file (used for testing)
199        '''
200        logging.info("Attempt to retrieve doc locally - from %s" %uri)
201        csml_dir = request.environ['paste.config']['app_conf']['csml_dir']
202        path = os.path.join(csml_dir, file)
203        if os.path.exists(path+'.csml'):
204            f = path+'.csml'
205        elif os.path.exists(path+'.xml'):
206            f = path +'.xml'
207        else:
208            logging.info("- cannot find file")
209            return 0, '<p>Cannot find CSML file %s</p>' % file
210        r=f.read()
211        logging.info("- returning file contents")
212        return 1,r     
Note: See TracBrowser for help on using the repository browser.