source: MILK/trunk/milk_server/milk_server/lib/ndgInterface.py @ 5178

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/MILK/trunk/milk_server/milk_server/lib/ndgInterface.py@5178
Revision 5178, 8.6 KB checked in by cbyrom, 12 years ago (diff)

Add code to filter atoms on publication state.

Line 
1# Copyright (C) 2007 STFC & NERC (Science and Technology Facilities Council).
2# This software may be distributed under the terms of the
3# Q Public License, version 1.0 or later.
4# http://ndg.nerc.ac.uk/public_docs/QPublic_license.txt
5"""
6Manages interface to NDG documents and data, including caching
7"""
8
9from cache import Cache
10from csml_cache import CSMLExtractCache
11import os, logging
12from ndg.common.src.dal.ndgRetrieve import NDGRetrieve
13from ndg.common.src.models.ndgObject import ndgObject
14from xml.etree import ElementTree as ET
15from pylons import request, session
16
17from pylons import g # for handle to access control PEP interface
18try:
19    from ndg.security.common.authz.pdp import PDPError
20    from ndg.security.common.authz.pep import PEPError
21except ImportError, e:
22    from warnings import warn
23    warn(__name__ + ": access control is disabled: %s" % e, RuntimeWarning)
24
25class ndgInterface:
26   
27    def __init__(self):
28        '''
29        Constructor to set up object
30        '''
31        self.CSMLDataCache = None
32        self.CSMLDocCache = None
33        self.XMLHCache = None
34
35       
36    def SetupCaches(self, csmlDataCache = None, csmlDocCache = None, \
37                    xmlHCache = None):
38        '''
39        Set up the required three level cache:
40                - a file cache
41                - a xmlhandler object cache, and
42                - a parsed CSML object cache.
43        @keyword csmlDataCache: an alternative csmlDataCache to use
44        @keyword csmlDocCache: an alternative csmlDocCache to use
45        @keyword xmlHCache: an alternative xmlHCache to use
46        '''
47        logging.info("Setting up ndgInterface caches")
48        if csmlDataCache:
49            self.CSMLDataCache = csmlDataCache
50        else:
51            self.CSMLDataCache=CSMLExtractCache(
52                request.environ['paste.config']['app_conf']['tmp_dir'],
53                max_size=10)
54       
55        if csmlDocCache:
56            self.CSMLDocCache = csmlDocCache
57        else:
58            self.CSMLDocCache = Cache(max_size=10)
59       
60        if xmlHCache:
61            self.XMLHCache = xmlHCache
62        else:
63            self.XMLHCache = Cache(max_size=10)
64        logging.info("Finished setting up caches")
65
66       
67    def __runRetrieveAndHandleErrors(self, ndgObject, outputFormat = None,
68                                     filterAtoms = False):
69        '''
70        Retrieve doc represented by ndgObject; if problem occurs during retrieval
71        catch this and pass back error in a tidy way
72        @param ndgObject: ndgObject representing doc to retrieve
73        @keyword outputFormat: format to return doc in
74        @keyword filterAtoms: if True, remove any atoms in the Working state - NB, these
75        should not be visible in 'browse' mode
76        @return: status, doc where status = 1 => success, 0 => failure
77        NB, when status = 0, doc will contain any associated error messages
78        '''
79        retrieveClient = NDGRetrieve(request.environ['ndgConfig'],
80                                     useDiscovery = g.standalone)
81
82        try:
83            status, doc = retrieveClient.retrieveDoc(ndgObject, 
84                                                     outputFormat = outputFormat,
85                                                     filterAtoms = filterAtoms)
86        except Exception, e:
87            logging.error("Problem occurred whilst retrieving doc, '%s'" %ndgObject)
88            logging.error("- error message: %s" %e)
89            status = 0
90            doc = str(e)
91           
92        return status, doc
93           
94
95    def GetXML(self, uri, outputSchema = None, useCache = True, filterAtoms = False):
96        '''
97        This method provides a secure interface to the server
98        document cache and a remote NDG exist. It is assumed that
99        the local filesystem is protected in that you can't get to
100        files except via the CSML api
101        @param uri: ndg format uri to locate doc from
102        @keyword outputSchema: format to return doc in. Default = None =>
103        return doc in its original format
104        @keyword useCache: check for data in the cache and use this, if set to
105        True (the default)   
106        @keyword filterAtoms: if True, remove any atoms in the Working state - NB, these
107        should not be visible in 'browse' mode
108        @return doc in string format
109        '''
110        #    Note that this method should not be used to obtain
111        #unsecured discovery documents, these are called directly
112        #in the retrieve controller!
113        logging.info("Getting XML from uri, '%s'" %uri)
114        if outputSchema:
115            logging.info("- return doc in new format: '%s'" %outputSchema)
116
117        try:
118            ndgObj = ndgObject(uri)
119            localFile=0
120        except ValueError:
121            ''' It's a local file not an ndg identifier '''
122            logging.info("File appears to be local - look for it there...")
123            ndgObj=uri
124            localFile=1
125       
126        if session and 'ndgCleared' in session:
127            cleared=session['ndgCleared']
128        else:
129            cleared=None
130       
131        # if we're requesting doc to be returned in a new format, or if
132        # the 'useCache' flag isn't set, do a new lookup of data
133        if outputSchema or not useCache:
134            status, xmlh = self.__runRetrieveAndHandleErrors(ndgObj, 
135                                                             outputFormat = outputSchema,
136                                                             filterAtoms = filterAtoms)
137        else:
138            if not self.XMLHCache:
139                self.SetupCaches()
140               
141            try:
142                # NB, there doesn't appear to be a nice way to check if things
143                # are in the cache - either they are or an exception is thrown...
144                xmlh=self.XMLHCache[uri]
145                status=1
146                logging.info('XMLH Cache hit for [%s]'%uri)
147            except:
148                logging.info('XMLH Cache miss for [%s]'%uri)
149                if localFile:
150                    status,xmlH=self.__getLocal(uri)
151                else:
152                    logging.info("Attempt to lookup document directly")
153                    status, xmlh = self.__runRetrieveAndHandleErrors(ndgObj, 
154                                                             outputFormat = outputSchema)
155                if status:
156                    logging.info("Document retrieved successfully - adding to cache") 
157                    self.XMLHCache[uri] = xmlh
158           
159        if not status: 
160            return status,xmlh
161       
162        # valid values of the return objects SHOULD BE
163        #   ok:         status=1, xmlh=an xml handler instance.
164        #   exceptions, status=0, xmlh='Exception(e)'
165       
166#        status,xmlh=self.__gatekeep(ndgObj,xmlh)
167#        if status:
168#            if cleared is None:
169#                session['ndgCleared']=[uri]
170#            else:
171#                session['ndgCleared'].append(uri)
172#            session.save()
173       
174        if cleared is None:
175            session['ndgCleared']=[uri]
176        else:
177            session['ndgCleared'].append(uri)
178        session.save()
179        return status, xmlh
180           
181
182    def __gatekeep(self,uri,x):
183        ''' This is the NDG gatekeeper '''
184        if 'ndgSec' in session:
185            securityTokens=session['ndgSec']
186        else:
187            securityTokens=None
188
189        if not hasattr(g, 'pep'):
190            if not g.standalone:
191                raise PEPError(\
192                "Security is disabled but the standalone flag is set to False")
193               
194            logging.info("__gatekeep: access control is disabled - standalone " + \
195                     "config flag is set")
196           
197        try:
198            # Arguments are: a handle to the resource and a handle to the users
199            # security tokens
200            g.pep(dict(uri=uri, doc=x), securityTokens, None)
201            return True, x
202       
203        except PDPError, e:
204            # Caught a known access control condition
205            return False, 'Access Denied for %s %s' % (uri, e)
206
207               
208    def __getLocal(self,uri):
209        '''
210        Returns a local csml file (used for testing)
211        '''
212        logging.info("Attempt to retrieve doc locally - from %s" %uri)
213        csml_dir = request.environ['paste.config']['app_conf']['csml_dir']
214        path = os.path.join(csml_dir, file)
215        if os.path.exists(path+'.csml'):
216            f = path+'.csml'
217        elif os.path.exists(path+'.xml'):
218            f = path +'.xml'
219        else:
220            logging.info("- cannot find file")
221            return 0, '<p>Cannot find CSML file %s</p>' % file
222        r=f.read()
223        logging.info("- returning file contents")
224        return 1,r     
Note: See TracBrowser for help on using the repository browser.