source: ceda_http_fileserver/trunk/ceda_http_fileserver/ceda/server/wsgi/fileserver/app.py @ 7009

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/ceda_http_fileserver/trunk/ceda_http_fileserver/ceda/server/wsgi/fileserver/app.py@7009
Revision 7009, 12.4 KB checked in by pjkersha, 10 years ago (diff)

Incomplete - task 9: Data Browser Replacement - Refactoring to include:

  • block based reading and writing of requests/responses
  • Fix to make WSGI compliant input read
  • configurable HTTP method support


Next:

  • URL encoding of file names for GET
  • USe mimetypes package
  • Property svn:keywords set to Id
Line 
1"""CEDA (Centre for Environmental Data Archival) File Server WSGI Application
2module
3"""
4__author__ = "P J Kershaw"
5__date__ = "11/06/10"
6__copyright__ = "(C) 2010 Science and Technology Facilities Council"
7__license__ = """http://www.apache.org/licenses/LICENSE-2.0"""
8__contact__ = "Philip.Kershaw@stfc.ac.uk"
9__revision__ = '$Id$'
10#   Copyright (c) 2006-2007 Open Source Applications Foundation
11#
12#   Licensed under the Apache License, Version 2.0 (the "License");
13#   you may not use this file except in compliance with the License.
14#   You may obtain a copy of the License at
15#
16#       http://www.apache.org/licenses/LICENSE-2.0
17#
18#   Unless required by applicable law or agreed to in writing, software
19#   distributed under the License is distributed on an "AS IS" BASIS,
20#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21#   See the License for the specific language governing permissions and
22#   limitations under the License.
23
24from urlparse import urlparse
25import httplib
26import os
27import logging
28
29log = logging.getLogger(__name__)
30
31# Content type sources taken from http://en.wikipedia.org/wiki/MIME_type
32content_type_table = {
33    'js': 'application/x-javascript', 
34    'html': 'text/html; charset=utf-8',
35    'fallback':'text/plain; charset=utf-8', 
36    'ogg': 'application/ogg', 
37    'xhtml':'text/html; charset=utf-8', 
38    'rm':'audio/vnd.rn-realaudio', 
39    'swf':'application/x-shockwave-flash', 
40    'mp3': 'audio/mpeg', 
41    'wma':'audio/x-ms-wma', 
42    'ra':'audio/vnd.rn-realaudio', 
43    'wav':'audio/x-wav', 
44    'gif':'image/gif', 
45    'jpeg':'image/jpeg',
46    'jpg':'image/jpeg', 
47    'png':'image/png', 
48    'tiff':'image/tiff', 
49    'css':'text/css; charset=utf-8',
50    'mpeg':'video/mpeg', 
51    'mp4':'video/mp4', 
52    'qt':'video/quicktime', 
53    'mov':'video/quicktime',
54    'wmv':'video/x-ms-wmv', 
55    'atom':'application/atom+xml; charset=utf-8',
56    'xslt':'application/xslt+xml', 
57    'svg':'image/svg+xml', 'mathml':'application/mathml+xml', 
58    'rss':'application/rss+xml; charset=utf-8',
59    'ics':'text/calendar; charset=utf-8 '
60}
61
62
63def reconstruct_url(environ):
64    # From WSGI spec, PEP 333
65    from urllib import quote
66    url = environ['wsgi.url_scheme']+'://'
67    if environ.get('HTTP_HOST'): url += environ['HTTP_HOST']
68    else:
69        url += environ['SERVER_NAME']
70        if environ['wsgi.url_scheme'] == 'https':
71            if environ['SERVER_PORT'] != '443':
72               url += ':' + environ['SERVER_PORT']
73        else:
74            if environ['SERVER_PORT'] != '80':
75               url += ':' + environ['SERVER_PORT']
76    url += quote(environ.get('SCRIPT_NAME',''))
77    url += quote(environ.get('PATH_INFO','')).replace(url.replace(':', '%3A'), '')
78    if environ.get('QUERY_STRING'):
79        url += '?' + environ['QUERY_STRING']
80    environ['reconstructed_url'] = url
81    return url
82   
83   
84class FileResponse(object):
85    DEFAULT_READ_SIZE = 1024
86   
87    def __init__(self, readSize=DEFAULT_READ_SIZE):
88        self.fileObj = None
89        self.fileSize = None
90        self.readSize = readSize
91
92    def __call__(self, fileObj, fileName):
93        self.fileSize = os.path.getsize(fileName)
94        self.fileObj = fileObj       
95       
96    def __iter__(self):
97        output = '\n'
98        while len(output) is not 0:
99            output = self.fileObj.read(self.readSize)
100            yield output
101
102    def _getReadSize(self):
103        return self.__readSize
104   
105    def _setReadSize(self, value):
106        self.__readSize = int(value)
107        if self.__readSize < 0:
108            raise ValueError('Expecting positive integer value for block size '
109                             'attribute')
110           
111    readSize = property(fget=_getReadSize, fset=_setReadSize,
112                        doc="block size reading the file in the iterator and "
113                            "returning a response")
114
115
116class FileServerApp(object):
117    """Application to serve static content"""
118    PARAM_PREFIX = 'fileserver.'
119    DEFAULT_READ_BLK_SIZE = 1024
120    DEFAULT_WRITE_BLK_SIZE = 1024
121   
122    __slots__ = (
123        '__fileResponse',
124        '__readBlkSize',
125        '__httpMethodMap',
126        'path',
127        'mount_point'
128    )
129   
130    def __init__(self, root_path, mount_point=None):
131       
132        self.__fileResponse = FileResponse()
133        self.__readBlkSize = None
134        self.__httpMethodMap = None
135       
136        self.path = os.path.abspath(os.path.expanduser(root_path))
137        self.mount_point = mount_point
138       
139        # Set from property methods to apply validation - referencing
140        # self.__class__ means derived class could make an alternative setting
141       
142        # Block size for PUT or POST operations
143        self.readBlkSize = self.__class__.DEFAULT_READ_BLK_SIZE
144
145        # Block size for GET operation
146        self.writeBlkSize = self.__class__.DEFAULT_WRITE_BLK_SIZE
147       
148        # Map HTTP method name to a method of this class
149        self.httpMethodMap = self.__class__.DEFAULT_HTTP_METHOD_MAP
150
151    def _getReadBlkSize(self):
152        return self.__readBlkSize
153
154    def _setReadBlkSize(self, value):
155        self.__readBlkSize = int(value)
156        if self.__readBlkSize < 0:
157            raise ValueError('Expecting positive integer value for block size '
158                             'attribute')
159
160    readBlkSize = property(_getReadBlkSize, _setReadBlkSize, 
161                           doc="ReadBlkSize's Docstring")
162
163    def _getWriteBlkSize(self):
164        return self.__fileResponse.readSize
165
166    def _setWriteBlkSize(self, value):
167        self.__fileResponse.readSize = value
168       
169    writeBlkSize = property(_getWriteBlkSize, _setWriteBlkSize, 
170                            doc="WriteBlkSize's Docstring")
171
172    def _getHttpMethodMap(self):
173        return self.__httpMethodMap
174
175    def _setHttpMethodMap(self, value):
176        if not isinstance(value, dict):
177            raise TypeError('Expecting dict type for HTTP method map '
178                            'attribute; got %r' % type(value))
179           
180        for name, method in value.items():
181            if not isinstance(name, basestring):
182                raise TypeError('Expecting string type for HTTP method name; '
183                                'got %r' % type(name))
184                   
185            if not callable(method):
186                raise TypeError('Expecting callable for HTTP method ; got %r' % 
187                                type(method))
188                               
189        self.__httpMethodMap = value.copy()
190
191    httpMethodMap = property(_getHttpMethodMap, _setHttpMethodMap, 
192                             doc="HttpMethodMap's Docstring")
193
194    @classmethod
195    def app_factory(cls, global_conf, prefix=PARAM_PREFIX, **app_conf): 
196        """Function following Paste app factory signature
197       
198        @type global_conf: dict       
199        @param global_conf: PasteDeploy global configuration dictionary
200        @type prefix: basestring
201        @param prefix: prefix for configuration items
202        @type app_conf: dict       
203        @param app_conf: PasteDeploy application specific configuration
204        dictionary
205        """
206        # This app             
207        app = cls(**app_conf)
208
209        return app
210   
211    def handler(self, environ, start_response):
212        """Application to serve out windmill provided"""
213        url = urlparse(reconstruct_url(environ))
214       
215        if self.mount_point is not None:
216            #split_url = url.path.split(self.mount_point, 1)
217            split_url = url[2].split(self.mount_point, 1)
218            serve_file = split_url[1]
219        else:
220            #serve_file = url.path
221            serve_file = url[2]
222       
223        serve_file = serve_file.replace('%20', ' ')
224       
225        requestMethodName = environ['REQUEST_METHOD']
226        requestMethod = self.httpMethodMap.get(requestMethodName)
227        if requestMethod is None:
228            response = ('%r HTTP request method is not supported' % 
229                        requestMethodName)
230            status = "%d %s" % (httplib.METHOD_NOT_ALLOWED,
231                                httplib.responses[httplib.METHOD_NOT_ALLOWED])
232            start_response(status,
233                           [('Content-length', str(len(response))),
234                            ('Content-type', 'text/plain')])
235            return [response]
236           
237        return requestMethod(self, serve_file, environ, start_response)
238       
239    def do_get(self, serve_file, environ, start_response):
240        # This if statement stops os.path.join doing a join with the
241        # absolute path '/'.  If this is done, the first argument is
242        # obliterated and the result is '/' exposing the root file system to
243        # the web client!!
244        if serve_file == '/':
245            filePath = self.path
246        else:
247            filePath = os.path.join(self.path, serve_file)
248       
249        from urlparse import urljoin
250        isDir = os.path.isdir(filePath)
251        if serve_file.endswith('/') or isDir:
252            if isDir:
253                dirContents = os.listdir(filePath)
254#                   
255#                    lines = [
256#                        '<a href="%s/%s">%s</a>' %
257#                        (serve_file.replace(filename, ''), filename, filename)
258#                        for filename in dirContents]
259
260                lines = ['<a href="%s">%s</a>' % 
261                         (urljoin(serve_file, filename), filename) 
262                         for filename in dirContents]
263
264                response = '<html>' + '<br>'.join(lines)+ '</html>'
265                start_response('200 OK', 
266                               [('Cache-Control','no-cache'), 
267                                ('Pragma','no-cache'),
268                                ('Content-Type', 'text/html; charset=utf-8'),
269                                ('Content-Length', str(len(response)))])
270               
271                return [response]
272            else:
273                log.error('failed to list directory %s/%s', self.path, 
274                          serve_file)
275                start_response('404 Not found', 
276                               [('Content-Type', 'text/plain')])
277                return ['404 Not Found']
278       
279        try:
280            f = open(os.path.join(self.path, serve_file), 'rb')
281            log.debug('opened file %s' % serve_file)
282        except IOError:
283            log.error('failed to open file %s/%s' % (self.path, serve_file))
284            start_response('404 Not found', [('Content-Type', 'text/plain')])
285            return ['404 Not Found']
286       
287        response = self.__fileResponse(f, os.path.join(self.path, serve_file))
288        start_response('200 OK', 
289                       [('Cache-Control','no-cache'), 
290                        ('Pragma','no-cache'), 
291                        ('Content-Length', str(response.size),),
292                        ('Content-Type', 
293                         self.guess_content_type(environ['PATH_INFO']))])
294        return response
295       
296    def do_put(self, serve_file, environ, start_response):
297        #Write file
298        try:
299            f = open(os.path.join(self.path, serve_file), 'w')
300            log.debug('opened file for writing %s' % serve_file)
301        except:
302            log.error('failed to open file for writing %s/%s', self.path, 
303                      serve_file)
304            start_response('403 Forbidden', [('Content-Type', 'text/plain')])
305            return ['403 Forbidden']
306       
307        self.readBlkSize = 1024
308        inputLength = environ['CONTENT_LENGTH']
309        inputStream = environ['wsgi.input']
310        nReads = inputLength / self.readBlkSize
311        remainder = inputLength % self.readBlkSize
312        readArray = [self.readBlkSize] * nReads
313        if remainder > 0:
314            nReads += 1
315            readArray.append(remainder)
316           
317        for length in readArray:
318            inputBlk = inputStream.read(length)
319            f.write(inputBlk)
320       
321    def do_mkcollection(self, serve_file, environ, start_response):
322        pass
323           
324    DEFAULT_HTTP_METHOD_MAP = {
325        'GET':          do_get, 
326        'PUT':          do_put, 
327        'MKCOLLECTION': do_mkcollection
328    }
329
330    def guess_content_type(self, path_info):
331        """Make a best guess at the content type"""
332        extention_split = path_info.split('.')
333
334        if content_type_table.has_key(extention_split[-1]):
335            return content_type_table[extention_split[-1]]
336        else:
337            return content_type_table['fallback']
338           
339    def __call__(self, environ, start_response):
340        return self.handler(environ, start_response)
Note: See TracBrowser for help on using the repository browser.