source: ceda_http_fileserver/trunk/ceda_http_fileserver/ceda/server/wsgi/fileserver/app.py @ 7022

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ceda_http_fileserver/trunk/ceda_http_fileserver/ceda/server/wsgi/fileserver/app.py@7022
Revision 7022, 12.2 KB checked in by pjkersha, 10 years ago (diff)

Incomplete - task 9: Data Browser Replacement:

  • Tested correct retrieval for sub-dir trees
  • Tested for forbidden access and file not found

Next: add regular expression filtering to exposing unwanted files over HTTP interface

  • Property svn:keywords set to Id
Line 
1"""CEDA (Centre for Environmental Data Archival) File Server WSGI Application
2module
3"""
4__author__ = "P J Kershaw"
5__date__ = "11/06/10"
6__copyright__ = "(C) 2010 Science and Technology Facilities Council"
7__license__ = """http://www.apache.org/licenses/LICENSE-2.0"""
8__contact__ = "Philip.Kershaw@stfc.ac.uk"
9__revision__ = '$Id$'
10#   Copyright (c) 2006-2007 Open Source Applications Foundation
11#
12#   Licensed under the Apache License, Version 2.0 (the "License");
13#   you may not use this file except in compliance with the License.
14#   You may obtain a copy of the License at
15#
16#       http://www.apache.org/licenses/LICENSE-2.0
17#
18#   Unless required by applicable law or agreed to in writing, software
19#   distributed under the License is distributed on an "AS IS" BASIS,
20#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21#   See the License for the specific language governing permissions and
22#   limitations under the License.
23
24from urlparse import urlparse, urljoin
25import httplib
26import urllib
27import os
28import traceback
29import mimetypes
30import errno
31import logging
32
33log = logging.getLogger(__name__)
34
35
36def reconstruct_url(environ):
37    # From WSGI spec, PEP 333
38    from urllib import quote
39    url = environ['wsgi.url_scheme']+'://'
40    if environ.get('HTTP_HOST'): 
41        url += environ['HTTP_HOST']
42    else:
43        url += environ['SERVER_NAME']
44        if environ['wsgi.url_scheme'] == 'https':
45            if environ['SERVER_PORT'] != '443':
46               url += ':' + environ['SERVER_PORT']
47        else:
48            if environ['SERVER_PORT'] != '80':
49               url += ':' + environ['SERVER_PORT']
50    url += urllib.quote(environ.get('SCRIPT_NAME',''))
51    url += urllib.quote(environ.get('PATH_INFO','')).replace(url.replace(':', '%3A'), '')
52    if environ.get('QUERY_STRING'):
53        url += '?' + environ['QUERY_STRING']
54    environ['reconstructed_url'] = url
55    return url
56   
57   
58class FileResponse(object):
59    DEFAULT_READ_SIZE = 1024
60   
61    def __init__(self, readSize=DEFAULT_READ_SIZE):
62        self.fileObj = None
63        self.fileSize = None
64        self.readSize = readSize
65
66    def __call__(self, fileObj, fileName):
67        self.fileSize = os.path.getsize(fileName)
68        self.fileObj = fileObj
69       
70        return self 
71       
72    def __iter__(self):
73        output = '\n'
74        while len(output) is not 0:
75            output = self.fileObj.read(self.readSize)
76            yield output
77
78    def _getReadSize(self):
79        return self.__readSize
80   
81    def _setReadSize(self, value):
82        self.__readSize = int(value)
83        if self.__readSize < 0:
84            raise ValueError('Expecting positive integer value for block size '
85                             'attribute')
86           
87    readSize = property(fget=_getReadSize, fset=_setReadSize,
88                        doc="block size reading the file in the iterator and "
89                            "returning a response")
90
91
92# Map HTTP status code to code + standard message string
93_statusCode2Msg = lambda code: "%d %s" % (code, 
94                    httplib.responses.get(code, httplib.INTERNAL_SERVER_ERROR))
95           
96           
97class FileServerApp(object):
98    """Application to serve static content"""
99    PARAM_PREFIX = 'fileserver.'
100    DEFAULT_READ_BLK_SIZE = 1024
101    DEFAULT_WRITE_BLK_SIZE = 1024
102    DEFAULT_CONTENT_TYPE = 'text/plain; charset=utf-8'
103   
104    statusCode2Msg = staticmethod(_statusCode2Msg)
105   
106    # Map HTTP status string to a given file system file access error
107    FILE_ACCESS_ERROR_MAP = {
108         errno.ENOENT: _statusCode2Msg(httplib.NOT_FOUND),
109         errno.EACCES: _statusCode2Msg(httplib.FORBIDDEN)
110    }
111   
112    __slots__ = (
113        '__fileResponse',
114        '__readBlkSize',
115        '__httpMethodMap',
116        'fileSysRoot',
117        'mountPoint',
118        'mimeTypes'
119    )
120   
121   
122    def __init__(self, root_path, mountPoint=None):
123       
124        self.__fileResponse = FileResponse()
125        self.__readBlkSize = None
126        self.__httpMethodMap = None
127       
128        self.fileSysRoot = os.path.abspath(os.path.expanduser(root_path))
129        self.mountPoint = mountPoint
130       
131        # Set from property methods to apply validation - referencing
132        # self.__class__ means derived class could make an alternative setting
133       
134        # Block size for PUT or POST operations
135        self.readBlkSize = self.__class__.DEFAULT_READ_BLK_SIZE
136
137        # Block size for GET operation
138        self.writeBlkSize = self.__class__.DEFAULT_WRITE_BLK_SIZE
139       
140        # Map HTTP method name to a method of this class
141        self.httpMethodMap = self.__class__.DEFAULT_HTTP_METHOD_MAP
142       
143        # MIME types
144        self.mimeTypes = mimetypes.MimeTypes()
145
146    def _getReadBlkSize(self):
147        return self.__readBlkSize
148
149    def _setReadBlkSize(self, value):
150        self.__readBlkSize = int(value)
151        if self.__readBlkSize < 0:
152            raise ValueError('Expecting positive integer value for block size '
153                             'attribute')
154
155    readBlkSize = property(_getReadBlkSize, _setReadBlkSize, 
156                           doc="ReadBlkSize's Docstring")
157
158    def _getWriteBlkSize(self):
159        return self.__fileResponse.readSize
160
161    def _setWriteBlkSize(self, value):
162        self.__fileResponse.readSize = value
163       
164    writeBlkSize = property(_getWriteBlkSize, _setWriteBlkSize, 
165                            doc="WriteBlkSize's Docstring")
166
167    def _getHttpMethodMap(self):
168        return self.__httpMethodMap
169
170    def _setHttpMethodMap(self, value):
171        if not isinstance(value, dict):
172            raise TypeError('Expecting dict type for HTTP method map '
173                            'attribute; got %r' % type(value))
174           
175        for name, method in value.items():
176            if not isinstance(name, basestring):
177                raise TypeError('Expecting string type for HTTP method name; '
178                                'got %r' % type(name))
179                   
180            if not callable(method):
181                raise TypeError('Expecting callable for HTTP method ; got %r' % 
182                                type(method))
183                               
184        self.__httpMethodMap = value.copy()
185
186    httpMethodMap = property(_getHttpMethodMap, _setHttpMethodMap, 
187                             doc="HttpMethodMap's Docstring")
188
189    @classmethod
190    def app_factory(cls, global_conf, prefix=PARAM_PREFIX, **app_conf): 
191        """Function following Paste app factory signature
192       
193        @type global_conf: dict       
194        @param global_conf: PasteDeploy global configuration dictionary
195        @type prefix: basestring
196        @param prefix: prefix for configuration items
197        @type app_conf: dict       
198        @param app_conf: PasteDeploy application specific configuration
199        dictionary
200        """
201        # This app             
202        app = cls(**app_conf)
203
204        return app
205   
206    def handler(self, environ, start_response):
207        """Application to serve out windmill provided"""
208        url = urlparse(reconstruct_url(environ))
209       
210        if self.mountPoint is not None:
211            #split_url = url.path.split(self.mountPoint, 1)
212            split_url = url[2].split(self.mountPoint, 1)
213            relativeURI = split_url[1]
214        else:
215            relativeURI = url[2]
216       
217        fileSysSubDir = urllib.url2pathname(relativeURI)
218       
219        # This if statement stops os.path.join doing a join with an
220        # absolute path '/...'.  If this is done, the first argument is
221        # obliterated and the result is '/' exposing the root file system to
222        # the web client!!
223        if fileSysSubDir.startswith('/'):
224            fileSysSubDir = fileSysSubDir[1:]
225               
226       
227        requestMethodName = environ['REQUEST_METHOD']
228        requestMethod = self.httpMethodMap.get(requestMethodName)
229        if requestMethod is None:
230            response = ('%r HTTP request method is not supported' % 
231                        requestMethodName)
232            status = FileServerApp.statusCode2Msg(httplib.METHOD_NOT_ALLOWED)
233            start_response(status,
234                           [('Content-length', str(len(response))),
235                            ('Content-type', 'text/plain')])
236            return [response]
237           
238        return requestMethod(self, fileSysSubDir, environ, start_response)
239       
240    def do_get(self, fileSysSubDir, environ, start_response):
241        """HTTP GET callback"""
242        filePath = os.path.join(self.fileSysRoot, fileSysSubDir)
243       
244        if os.path.isdir(filePath):
245            dirContents = os.listdir(filePath)
246
247            lines = [
248                '<a href="/%s">%s</a>' % 
249                (urllib.pathname2url(os.path.join(fileSysSubDir, filename)),
250                 filename) 
251                for filename in dirContents
252            ]
253
254            response = '<html>' + '<br>'.join(lines)+ '</html>'
255            start_response('200 OK', 
256                           [('Cache-Control','no-cache'), 
257                            ('Pragma','no-cache'),
258                            ('Content-Type', 'text/html; charset=utf-8'),
259                            ('Content-Length', str(len(response)))])
260           
261            return [response]
262        else:       
263            try:
264                fileObj = open(filePath, 'rb')
265                log.debug('opened file %s', filePath)
266            except IOError, e:
267                # Map file access error to a HTTP response code
268                status = FileServerApp.mapFileAccessError2HttpStatus(e.errno)
269                log.error('failed to open file %r: %s', filePath, 
270                          traceback.format_exc())
271                response = status
272                start_response(status, 
273                               [('Content-Type', 'text/plain'),
274                                ('Content-length', str(len(response)))])
275                return [response]
276           
277            response = self.__fileResponse(fileObj, filePath)
278            start_response('200 OK', 
279                           [('Cache-Control','no-cache'), 
280                            ('Pragma','no-cache'), 
281                            ('Content-Length', str(response.fileSize)),
282                            ('Content-Type', 
283                             self.getContentType(environ['PATH_INFO']))])
284            return response
285       
286    def do_put(self, fileSysSubDir, environ, start_response):
287        """HTTP PUT callback"""
288        try:
289            filePath = os.path.join(self.fileSysRoot, fileSysSubDir)
290            fileObj = open(filePath, 'wb')
291            log.debug('opened file for writing %s', filePath)
292           
293        except IOError, e:
294            log.error('failed to open file for writing %r', filePath)
295            status = FileServerApp.mapFileAccessError2HttpStatus(e.errno)
296            response = status
297            start_response(status, 
298                           [('Content-Type', 'text/plain'),
299                            ('Content-length', str(len(response)))])
300            return [response]
301       
302        inputLength = environ['CONTENT_LENGTH']
303        inputStream = environ['wsgi.input']
304        nReads = inputLength / self.readBlkSize
305        remainder = inputLength % self.readBlkSize
306        readArray = [self.readBlkSize] * nReads
307        if remainder > 0:
308            nReads += 1
309            readArray.append(remainder)
310           
311        for length in readArray:
312            inputBlk = inputStream.read(length)
313            fileObj.write(inputBlk)
314
315    @classmethod
316    def mapFileAccessError2HttpStatus(cls, errno):
317        """Map file access error to a standard HTTP response status message"""
318        return cls.FILE_ACCESS_ERROR_MAP.get(errno, 
319                            cls.statusCode2Msg(httplib.INTERNAL_SERVER_ERROR))
320                                                       
321    def getContentType(self, path_info):
322        """Make a best guess at the content type"""
323        contentType = self.mimeTypes.guess_type(path_info)[0]
324        if contentType is not None:
325            return contentType
326        else:
327            return self.__class__.DEFAULT_CONTENT_TYPE
328           
329    def __call__(self, environ, start_response):
330        return self.handler(environ, start_response)
331       
332    DEFAULT_HTTP_METHOD_MAP = {
333        'GET':          do_get, 
334        'PUT':          do_put, 
335    }
Note: See TracBrowser for help on using the repository browser.