source: TI02-CSML/trunk/csml/csmllibs/open_file_cache.py @ 3704

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/TI02-CSML/trunk/csml/csmllibs/open_file_cache.py@3704
Revision 3704, 2.9 KB checked in by domlowe, 12 years ago (diff)

Added Ag's file cache code to DataInterface?. The Data Interface is read only

Line 
1"""
2open_file_cache.py
3==================
4
5Holds OpenFileCache class. Instantiated when csmldatainterface is imported
6Author: Ag Stephens.
7
8"""
9
10# Import standard library modules
11import os
12import glob
13
14class OpenFileCache:
15    """
16    Class to manage a dictionary of CDMS file objects (mode='r')
17    open at the same time. Needs an internal limit after which it
18    will close the oldest and delete it from the dictionary.
19   
20    Note: max number of files in any HadGEM1 dir is 3177 (aebti),
21    for virtual vars you might need ape/apa therefore could
22    get up to 7400.
23   
24    Note: so we set file limit to 7400 just in case.
25   
26    Note: On test system CPU and MEM usage were up to 2.5% and 2.3% when
27    375 files were open. If you scale this up they could be up to
28    50% with 7500 files open.
29   
30    Usage:
31   
32    """
33    def __init__(self):
34        self.d = {}
35        self.l = []
36        self.limit = 1000
37   
38    def __len__(self):
39        return len(self.d)
40   
41    def openFile(self, opener, path):
42        """
43        Takes in an opener callable (could be cdms.open, or
44        self.parent.openFile) and the path name.
45        Checks if file is in cache, if so returns file handle.
46        If not, first checks file limit has not been exceeded and then
47        opens file and adds to cache dictionary. If file limit is exceeded
48        then the oldest file handle is closed and removed from the dictionary.
49        """
50        #print "Opening:", path
51        if path in self.d.keys():
52            return self.d[path]
53        # Now already open so check if limit reached
54        if (self.__len__()) >= self.limit:
55            oldest = self.l[0]
56            #print "Hit limit, closing oldest:", oldest
57            self.closeFile(oldest)
58        self.d[path] = apply(opener, [path], {"mode":"r"})
59        self.l.append(path)
60        return self.d[path]
61       
62    def closeFile(self, path):
63        "Closes individual file in cache."
64        if path not in self.d.keys():
65            raise Exception("Path '" + path + "' not in cached files so cannot close!")
66        self.d[path].close()
67       
68       
69        del self.d[path]
70        # And remove from list
71        #print "Removing", path, "from", self.l
72        del self.l[self.l.index(path)]
73        #print "Has", path, "been removed?", self.l
74        return True
75   
76    def closeAll(self):
77        "Closes all files in cache."
78        #print "Closing all cache files..."
79        self.l = []
80        for path, file in self.d.items():
81            file.close()
82            del self.d[path]
83        return True
84               
85               
86if __name__ == "__main__":
87    files = glob.glob("/badc/hadgem1/data/aebtd/ape/*.pp")
88    cache = OpenFileCache()
89    cache.limit = 2
90    cache.openFile(files[0])
91    cache.openFile(files[1])
92    cache.openFile(files[0])
93    cache.openFile(files[2])
94    cache.closeFile(files[1])
95    cache.openFile(files[0])
96    #print cache.d
97    cache.closeAll()
98    del cache
Note: See TracBrowser for help on using the repository browser.