source: ceda_http_fileserver/trunk/ceda_http_fileserver/ceda/server/wsgi/fileserver/app.py @ 7021

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ceda_http_fileserver/trunk/ceda_http_fileserver/ceda/server/wsgi/fileserver/app.py@7021
Revision 7021, 12.2 KB checked in by pjkersha, 10 years ago (diff)

Incomplete - task 9: Data Browser Replacement:

  • Added URL encoding of file names for GET
  • Uses mimetypes package
  • tested returning file in variable length blocks with FileRespone?.iter
  • Property svn:keywords set to Id
Line 
1"""CEDA (Centre for Environmental Data Archival) File Server WSGI Application
2module
3"""
4__author__ = "P J Kershaw"
5__date__ = "11/06/10"
6__copyright__ = "(C) 2010 Science and Technology Facilities Council"
7__license__ = """http://www.apache.org/licenses/LICENSE-2.0"""
8__contact__ = "Philip.Kershaw@stfc.ac.uk"
9__revision__ = '$Id$'
10#   Copyright (c) 2006-2007 Open Source Applications Foundation
11#
12#   Licensed under the Apache License, Version 2.0 (the "License");
13#   you may not use this file except in compliance with the License.
14#   You may obtain a copy of the License at
15#
16#       http://www.apache.org/licenses/LICENSE-2.0
17#
18#   Unless required by applicable law or agreed to in writing, software
19#   distributed under the License is distributed on an "AS IS" BASIS,
20#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21#   See the License for the specific language governing permissions and
22#   limitations under the License.
23
24from urlparse import urlparse, urljoin
25import httplib
26import urllib
27import os
28import traceback
29import logging
30import mimetypes
31
32log = logging.getLogger(__name__)
33
34# Content type sources taken from http://en.wikipedia.org/wiki/MIME_type
35_types_map = {
36    'js': 'application/x-javascript', 
37    'html': 'text/html; charset=utf-8',
38    'fallback':'text/plain; charset=utf-8', 
39    'ogg': 'application/ogg', 
40    'xhtml':'text/html; charset=utf-8', 
41    'rm':'audio/vnd.rn-realaudio', 
42    'swf':'application/x-shockwave-flash', 
43    'mp3': 'audio/mpeg', 
44    'wma':'audio/x-ms-wma', 
45    'ra':'audio/vnd.rn-realaudio', 
46    'wav':'audio/x-wav', 
47    'gif':'image/gif', 
48    'jpeg':'image/jpeg',
49    'jpg':'image/jpeg', 
50    'png':'image/png', 
51    'tiff':'image/tiff', 
52    'css':'text/css; charset=utf-8',
53    'mpeg':'video/mpeg', 
54    'mp4':'video/mp4', 
55    'qt':'video/quicktime', 
56    'mov':'video/quicktime',
57    'wmv':'video/x-ms-wmv', 
58    'atom':'application/atom+xml; charset=utf-8',
59    'xslt':'application/xslt+xml', 
60    'svg':'image/svg+xml', 'mathml':'application/mathml+xml', 
61    'rss':'application/rss+xml; charset=utf-8',
62    'ics':'text/calendar; charset=utf-8 '
63}
64
65
66def reconstruct_url(environ):
67    # From WSGI spec, PEP 333
68    from urllib import quote
69    url = environ['wsgi.url_scheme']+'://'
70    if environ.get('HTTP_HOST'): url += environ['HTTP_HOST']
71    else:
72        url += environ['SERVER_NAME']
73        if environ['wsgi.url_scheme'] == 'https':
74            if environ['SERVER_PORT'] != '443':
75               url += ':' + environ['SERVER_PORT']
76        else:
77            if environ['SERVER_PORT'] != '80':
78               url += ':' + environ['SERVER_PORT']
79    url += quote(environ.get('SCRIPT_NAME',''))
80    url += quote(environ.get('PATH_INFO','')).replace(url.replace(':', '%3A'), '')
81    if environ.get('QUERY_STRING'):
82        url += '?' + environ['QUERY_STRING']
83    environ['reconstructed_url'] = url
84    return url
85   
86   
87class FileResponse(object):
88    DEFAULT_READ_SIZE = 1024
89   
90    def __init__(self, readSize=DEFAULT_READ_SIZE):
91        self.fileObj = None
92        self.fileSize = None
93        self.readSize = readSize
94
95    def __call__(self, fileObj, fileName):
96        self.fileSize = os.path.getsize(fileName)
97        self.fileObj = fileObj
98       
99        return self 
100       
101    def __iter__(self):
102        output = '\n'
103        while len(output) is not 0:
104            output = self.fileObj.read(self.readSize)
105            yield output
106
107    def _getReadSize(self):
108        return self.__readSize
109   
110    def _setReadSize(self, value):
111        self.__readSize = int(value)
112        if self.__readSize < 0:
113            raise ValueError('Expecting positive integer value for block size '
114                             'attribute')
115           
116    readSize = property(fget=_getReadSize, fset=_setReadSize,
117                        doc="block size reading the file in the iterator and "
118                            "returning a response")
119
120
121class FileServerApp(object):
122    """Application to serve static content"""
123    PARAM_PREFIX = 'fileserver.'
124    DEFAULT_READ_BLK_SIZE = 1024
125    DEFAULT_WRITE_BLK_SIZE = 1024
126   
127    __slots__ = (
128        '__fileResponse',
129        '__readBlkSize',
130        '__httpMethodMap',
131        'fileSysPath',
132        'mountPoint',
133        'mimeTypes'
134    )
135   
136    def __init__(self, root_path, mountPoint=None):
137       
138        self.__fileResponse = FileResponse()
139        self.__readBlkSize = None
140        self.__httpMethodMap = None
141       
142        self.fileSysPath = os.path.abspath(os.path.expanduser(root_path))
143        self.mountPoint = mountPoint
144       
145        # Set from property methods to apply validation - referencing
146        # self.__class__ means derived class could make an alternative setting
147       
148        # Block size for PUT or POST operations
149        self.readBlkSize = self.__class__.DEFAULT_READ_BLK_SIZE
150
151        # Block size for GET operation
152        self.writeBlkSize = self.__class__.DEFAULT_WRITE_BLK_SIZE
153       
154        # Map HTTP method name to a method of this class
155        self.httpMethodMap = self.__class__.DEFAULT_HTTP_METHOD_MAP
156       
157        # MIME types
158        self.mimeTypes = mimetypes.MimeTypes()
159
160    def _getReadBlkSize(self):
161        return self.__readBlkSize
162
163    def _setReadBlkSize(self, value):
164        self.__readBlkSize = int(value)
165        if self.__readBlkSize < 0:
166            raise ValueError('Expecting positive integer value for block size '
167                             'attribute')
168
169    readBlkSize = property(_getReadBlkSize, _setReadBlkSize, 
170                           doc="ReadBlkSize's Docstring")
171
172    def _getWriteBlkSize(self):
173        return self.__fileResponse.readSize
174
175    def _setWriteBlkSize(self, value):
176        self.__fileResponse.readSize = value
177       
178    writeBlkSize = property(_getWriteBlkSize, _setWriteBlkSize, 
179                            doc="WriteBlkSize's Docstring")
180
181    def _getHttpMethodMap(self):
182        return self.__httpMethodMap
183
184    def _setHttpMethodMap(self, value):
185        if not isinstance(value, dict):
186            raise TypeError('Expecting dict type for HTTP method map '
187                            'attribute; got %r' % type(value))
188           
189        for name, method in value.items():
190            if not isinstance(name, basestring):
191                raise TypeError('Expecting string type for HTTP method name; '
192                                'got %r' % type(name))
193                   
194            if not callable(method):
195                raise TypeError('Expecting callable for HTTP method ; got %r' % 
196                                type(method))
197                               
198        self.__httpMethodMap = value.copy()
199
200    httpMethodMap = property(_getHttpMethodMap, _setHttpMethodMap, 
201                             doc="HttpMethodMap's Docstring")
202
203    @classmethod
204    def app_factory(cls, global_conf, prefix=PARAM_PREFIX, **app_conf): 
205        """Function following Paste app factory signature
206       
207        @type global_conf: dict       
208        @param global_conf: PasteDeploy global configuration dictionary
209        @type prefix: basestring
210        @param prefix: prefix for configuration items
211        @type app_conf: dict       
212        @param app_conf: PasteDeploy application specific configuration
213        dictionary
214        """
215        # This app             
216        app = cls(**app_conf)
217
218        return app
219   
220    def handler(self, environ, start_response):
221        """Application to serve out windmill provided"""
222        url = urlparse(reconstruct_url(environ))
223       
224        if self.mountPoint is not None:
225            #split_url = url.path.split(self.mountPoint, 1)
226            split_url = url[2].split(self.mountPoint, 1)
227            relativeURI = split_url[1]
228        else:
229            relativeURI = url[2]
230       
231        fileSysRelPath = urllib.url2pathname(relativeURI)
232       
233        requestMethodName = environ['REQUEST_METHOD']
234        requestMethod = self.httpMethodMap.get(requestMethodName)
235        if requestMethod is None:
236            response = ('%r HTTP request method is not supported' % 
237                        requestMethodName)
238            status = "%d %s" % (httplib.METHOD_NOT_ALLOWED,
239                                httplib.responses[httplib.METHOD_NOT_ALLOWED])
240            start_response(status,
241                           [('Content-length', str(len(response))),
242                            ('Content-type', 'text/plain')])
243            return [response]
244           
245        return requestMethod(self, fileSysRelPath, environ, 
246                             start_response)
247       
248    def do_get(self, fileSysRelPath, environ, start_response):
249        # This if statement stops os.path.join doing a join with the
250        # absolute path '/'.  If this is done, the first argument is
251        # obliterated and the result is '/' exposing the root file system to
252        # the web client!!
253        if fileSysRelPath == '/':
254            filePath = self.fileSysPath
255        else:
256            if fileSysRelPath.startswith('/'):
257                fileSysRelPath = fileSysRelPath[1:]
258               
259            filePath = os.path.join(self.fileSysPath, fileSysRelPath)
260       
261        isDir = os.path.isdir(filePath)
262        if isDir:
263            dirContents = os.listdir(filePath)
264
265            lines = [
266                '<a href="%s">%s</a>' % 
267                (urljoin(fileSysRelPath, urllib.pathname2url(filename)),
268                filename) 
269                for filename in dirContents
270            ]
271
272            response = '<html>' + '<br>'.join(lines)+ '</html>'
273            start_response('200 OK', 
274                           [('Cache-Control','no-cache'), 
275                            ('Pragma','no-cache'),
276                            ('Content-Type', 'text/html; charset=utf-8'),
277                            ('Content-Length', str(len(response)))])
278           
279            return [response]
280        else:       
281            try:
282                fileObj = open(filePath, 'rb')
283                log.debug('opened file %s', filePath)
284            except IOError:
285                log.error('failed to open file %r: %s', filePath, 
286                          traceback.format_exc())
287                response = '404 Not Found'
288                start_response('404 Not found', 
289                               [('Content-Type', 'text/plain'),
290                                ('Content-length', str(len(response)))])
291                return [response]
292           
293            response = self.__fileResponse(fileObj, filePath)
294            start_response('200 OK', 
295                           [('Cache-Control','no-cache'), 
296                            ('Pragma','no-cache'), 
297                            ('Content-Length', str(response.fileSize)),
298                            ('Content-Type', 
299                             self.getContentType(environ['PATH_INFO']))])
300            return response
301       
302    def do_put(self, fileSysRelPath, environ, start_response):
303        #Write file
304        try:
305            filePath = os.path.join(self.fileSysPath, fileSysRelPath)
306            f = open(filePath, 'w')
307            log.debug('opened file for writing %s' % filePath)
308        except:
309            log.error('failed to open file for writing %r', filePath)
310            response = '403 Forbidden'
311            start_response('403 Forbidden', 
312                           [('Content-Type', 'text/plain'),
313                            ('Content-length', str(len(response)))])
314            return [response]
315       
316        inputLength = environ['CONTENT_LENGTH']
317        inputStream = environ['wsgi.input']
318        nReads = inputLength / self.readBlkSize
319        remainder = inputLength % self.readBlkSize
320        readArray = [self.readBlkSize] * nReads
321        if remainder > 0:
322            nReads += 1
323            readArray.append(remainder)
324           
325        for length in readArray:
326            inputBlk = inputStream.read(length)
327            f.write(inputBlk)
328
329    def getContentType(self, path_info):
330        """Make a best guess at the content type"""
331        contentType = self.mimeTypes.guess_type(path_info)[0]
332        if contentType is not None:
333            return contentType
334        else:
335            return self.mimeTypes.types_map['fallback']
336           
337    def __call__(self, environ, start_response):
338        return self.handler(environ, start_response)
339       
340    DEFAULT_HTTP_METHOD_MAP = {
341        'GET':          do_get, 
342        'PUT':          do_put, 
343    }
Note: See TracBrowser for help on using the repository browser.