source: MILK/trunk/milk_server/milk_server/lib/cache.py @ 2630

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/MILK/trunk/milk_server/milk_server/lib/cache.py@5176
Revision 2630, 12.8 KB checked in by spascoe, 13 years ago (diff)

Added cache code but performance is still sluggish. Need to work out
exactly what's going on.

Line 
1"""
2General thread-safe cache class.
3
4Taken from
5http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302997/index_txt
6
7@author: Nicolas Lehuen
8
9"""
10# -*- coding: iso-8859-1 -*-
11
12from os import stat
13from time import time, mktime
14from rfc822 import parsedate
15from calendar import timegm
16import urllib2
17import re
18import weakref
19import new
20
21try:
22    from threading import Lock
23except ImportError:
24    from dummy_threading import Lock
25
26NOT_INITIALIZED = object()
27
28class Entry(object):
29    """ A cache entry, mostly an internal object. """
30    def __init__(self, key):
31        object.__init__(self)
32        self._key=key
33        self._value=NOT_INITIALIZED
34        self._lock=Lock()
35
36class Cache(object):
37    """ An abstract, multi-threaded cache object. """
38   
39    def __init__(self, max_size=0):
40        """ Builds a cache with a limit of max_size entries.
41            If this limit is exceeded, the Least Recently Used entry is discarded.
42            if max_size==0, the cache is unbounded (no LRU rule is applied).
43        """
44        object.__init__(self)
45        self._maxsize=max_size
46        self._dict={}
47        self._lock=Lock()
48       
49        # Header of the access list
50        if self._maxsize:
51            self._head=Entry(None)
52            self._head._previous=self._head
53            self._head._next=self._head
54
55    def __setitem__(self, name, value):
56        """ Populates the cache with a given name and value. """
57        key = self.key(name)
58       
59        entry = self._get_entry(key)
60       
61        entry._lock.acquire()
62        try:
63            self._pack(entry,value)
64            self.commit()
65        finally:
66            entry._lock.release()
67
68    def __getitem__(self, name):
69        """ Gets a value from the cache, builds it if required.
70        """
71        return self._checkitem(name)[2]
72
73    def __delitem__(self, name):
74        self._lock.acquire()
75        try:
76            key = self.key(name)
77            del self._dict[key]
78        finally:
79            self._lock.release()
80
81    def _get_entry(self,key):
82        self._lock.acquire()
83        try:
84            entry = self._dict.get(key)
85            if not entry:
86                entry = Entry(key)
87                self._dict[key]=entry
88                if self._maxsize:
89                    entry._next = entry._previous = None
90                    self._access(entry)
91                    self._checklru()
92            elif self._maxsize:
93                self._access(entry)
94            return entry
95        finally:
96            self._lock.release()
97
98    def _checkitem(self, name):
99        """ Gets a value from the cache, builds it if required.
100            Returns a tuple is_new, key, value, entry.
101            If is_new is True, the result had to be rebuilt.
102        """
103        key = self.key(name)
104       
105        entry = self._get_entry(key)
106
107        entry._lock.acquire()
108        try:
109            value = self._unpack(entry)
110            is_new = False
111            if value is NOT_INITIALIZED:
112                opened = self.check(key, name, entry)
113                value = self.build(key, name, opened, entry)
114                is_new = True
115                self._pack(entry, value)
116                self.commit()
117            else:
118                opened = self.check(key, name, entry)
119                if opened is not None:
120                    value = self.build(key, name, opened, entry)
121                    is_new = True
122                    self._pack(entry, value)
123                    self.commit()
124            return is_new, key, value, entry
125        finally:
126            entry._lock.release()
127
128    def mru(self):
129        """ Returns the Most Recently Used key """
130        if self._maxsize:
131            self._lock.acquire()
132            try:
133                return self._head._previous._key
134            finally:
135                self._lock.release()
136        else:
137            return None
138
139    def lru(self):
140        """ Returns the Least Recently Used key """
141        if self._maxsize:
142            self._lock.acquire()
143            try:
144                return self._head._next._key
145            finally:
146                self._lock.release()
147        else:
148            return None
149
150    def key(self, name):
151        """ Override this method to extract a key from the name passed to the [] operator """
152        return name
153
154    def commit(self):
155        """ Override this method if you want to do something each time the underlying dictionary is modified (e.g. make it persistent). """
156        pass
157
158    def clear(self):
159        """ Clears the cache """
160        self._lock.acquire()
161        try:
162            self._dict.clear()
163            if self._maxsize:
164                self._head._next=self._head
165                self._head._previous=self._head
166        finally:
167            self._lock.release()
168
169    def check(self, key, name, entry):
170        """ Override this method to check whether the entry with the given name is stale. Return None if it is fresh
171            or an opened resource if it is stale. The object returned will be passed to the 'build' method as the 'opened' parameter.
172            Use the 'entry' parameter to store meta-data if required. Don't worry about multiple threads accessing the same name,
173            as this method is properly isolated.
174        """
175        return None
176
177    def build(self, key, name, opened, entry):
178        """ Build the cached value with the given name from the given opened resource. Use entry to obtain or store meta-data if needed.
179             Don't worry about multiple threads accessing the same name, as this method is properly isolated.
180        """
181        raise NotImplementedError()
182           
183    def _access(self, entry):
184        " Internal use only, must be invoked within a cache lock. Updates the access list. """
185        if entry._next is not self._head:
186            if entry._previous is not None:
187                # remove the entry from the access list
188                entry._previous._next=entry._next
189                entry._next._previous=entry._previous
190            # insert the entry at the end of the access list
191            entry._previous=self._head._previous
192            entry._previous._next=entry
193            entry._next=self._head
194            entry._next._previous=entry
195            if self._head._next is self._head:
196                self._head._next=entry
197
198    def _checklru(self):
199        " Internal use only, must be invoked within a cache lock. Removes the LRU entry if needed. """
200        if len(self._dict)>self._maxsize:
201            lru=self._head._next
202            lru._previous._next=lru._next
203            lru._next._previous=lru._previous
204            del self._dict[lru._key]
205
206    def _pack(self, entry, value):
207        """ Store the value in the entry. """
208        entry._value=value
209
210    def _unpack(self, entry):
211        """ Recover the value from the entry, returns NOT_INITIALIZED if it is not OK. """
212        return entry._value
213
214class WeakCache(Cache):
215    """ This cache holds weak references to the values it stores. Whenever a value is not longer
216        normally referenced, it is removed from the cache. Useful for sharing the result of long
217        computations but letting them go as soon as they are not needed by anybody.
218    """
219       
220    def _pack(self, entry, value):
221        entry._value=weakref.ref(value, lambda ref: self.__delitem__(entry._key))
222       
223    def _unpack(self, entry):
224        if entry._value is NOT_INITIALIZED:
225            return NOT_INITIALIZED
226           
227        value = entry._value()
228        if value is None:
229            return NOT_INITIALIZED
230        else:
231            return value
232
233class FileCache(Cache):
234    """ A file cache. Returns the content of the files as a string, given their filename.
235        Whenever the files are modified (according to their modification time) the cache is updated.
236        Override the build method to obtain more interesting behaviour.
237    """
238    def __init__(self, max_size=0, mode='rb'):
239        Cache.__init__(self, max_size)
240        self.mode=mode
241   
242    def check(self, key, name, entry):
243        timestamp = stat(key).st_mtime
244
245        if entry._value is NOT_INITIALIZED:
246            entry._timestamp = timestamp
247            return file(key, self.mode)
248        else:
249            if entry._timestamp != timestamp:
250                entry._timestamp = timestamp
251                return file(key, self.mode)
252            else:
253                return None
254
255    def build(self, key, name, opened, entry):
256        """ Return the content of the file as a string. Override this for better behaviour. """
257        try:
258            return opened.read()
259        finally:
260            opened.close()
261
262def parseRFC822Time(t):
263    return mktime(parsedate(t))
264
265re_max_age=re.compile('max-age\s*=\s*(\d+)', re.I)
266
267class HTTPEntity(object):
268    def __init__(self, entity, metadata):
269        self.entity=entity
270        self.metadata=metadata
271   
272    def __repr__(self):
273        return 'HTTPEntity(%s, %s)'%(repr(self.entity), self.metadata)
274       
275    def __str__(self):
276        return self.entity
277
278class HTTPCache(Cache):
279    """ An HTTP cache. Returns the entity found at the given URL.
280        Uses Expires, ETag and Last-Modified headers to minimize bandwidth usage.
281        Partial Cache-Control support (only max-age is supported).
282    """
283    def check(self, key, name, entry):
284        request = urllib2.Request(key)
285       
286        try:
287            if time()<entry._expires:
288                return None
289        except AttributeError:
290            pass           
291        try:
292            header, value = entry._validator
293            request.headers[header]=value
294        except AttributeError:
295            pass
296        opened = None
297        try:
298            opened = urllib2.urlopen(request)
299            headers = opened.info()
300
301            # expiration handling           
302            expiration = False
303            try:
304                match = re_max_age.match(headers['cache-control'])
305                if match:
306                        entry._expires=time()+int(match.group(1))
307                        expiration = True
308            except (KeyError, ValueError):
309                pass
310            if not expiration:
311                try:
312                    date = parseRFC822Time(headers['date'])
313                    expires = parseRFC822Time(headers['expires'])
314                    entry._expires = time()+(expires-date)
315                    expiration = True
316                except KeyError:
317                    pass
318           
319            # validator handling
320            validation = False
321            try:
322                entry._validator='If-None-Match', headers['etag']
323                validation = True
324            except KeyError:
325                pass
326            if not validation:
327                try:
328                    entry._validator='If-Modified-Since', headers['last-modified']
329                except KeyError:
330                    pass
331
332            return opened
333        except urllib2.HTTPError, error:
334            if opened: opened.close()
335            if error.code==304:
336                return None
337            else:
338                raise error
339
340    def build(self, key, name, opened, entry):
341        try:
342            return HTTPEntity(opened.read(), dict(opened.info()))
343        finally:
344            opened.close()
345
346re_not_word = re.compile(r'\W+')
347
348class ModuleCache(FileCache):
349    """ A module cache. Give it a file name, it returns a module
350        which results from the execution of the Python script it contains.
351        This module is not inserted into sys.modules.
352    """
353    def __init__(self, max_size=0):
354        FileCache.__init__(self, max_size, 'r')
355   
356    def build(self, key, name, opened, entry):
357        try:
358            module = new.module(re_not_word.sub('_',key))
359            module.__file__ = key
360            exec opened in module.__dict__
361            return module
362        finally:
363            opened.close()
364
365class HttpModuleCache(HTTPCache):
366    """ A module cache. Give it an HTTP URL, it returns a module
367        which results from the execution of the Python script it contains.
368        This module is not inserted into sys.modules.
369    """
370    def __init__(self, max_size=0):
371        HTTPCache.__init__(self, max_size)
372   
373    def build(self, key, name, opened, entry):
374        try:
375            module = new.module(re_not_word.sub('_',key))
376            module.__file__ = key
377            text = opened.read().replace('\r\n', '\n')
378            code = compile(text, name, 'exec')
379            exec code in module.__dict__
380            return module
381        finally:
382            opened.close()
383
384class FunctionCache(Cache):
385    def __init__(self, function, max_size=0):
386        Cache.__init__(self, max_size)
387        self.function=function
388   
389    def __call__(self, *args, **kw):
390        if kw:
391            # a dict is not hashable so we build a tuple of (key, value) pairs
392            kw = tuple(kw.iteritems())
393            return self[args, kw]
394        else:
395            return self[args, ()]
396   
397    def build(self, key, name, opened, entry):
398        args, kw = key
399        return self.function(*args, **dict(kw))
Note: See TracBrowser for help on using the repository browser.