source: cows_wps/trunk/cows_wps/process_handler/fileset.py @ 7018

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/cows_wps/trunk/cows_wps/process_handler/fileset.py@7018
Revision 7018, 5.5 KB checked in by astephen, 11 years ago (diff)

more fixes for zipping up.

Line 
1"""
2Implements the data structure originally designed by Ag to store a nested set of
3data or zipfiles.
4
5"""
6import logging
7import zipfile
8import os
9import stat
10
11class FLAG:
12    ROOT = 'r'
13    DATA = 'data'
14    ZIP = 'zip'
15    IMAGE = 'image'
16
17
18log = logging.getLogger(__name__)
19
20class FileSetEntry(object):
21    """
22    @ivar flag: A flag designating the file type
23    @ivar size: The size of the file
24    @ivar name: the name of the file relative to it's container
25    @ivar info: Some description text about the file
26    @ivar capabilities: Some text used to define what can be done with the file
27   
28    """
29    def __init__(self, flag, name, size, info):
30        self.flag = flag
31        self.name = name
32        self.size = size
33        self.info = info
34           
35    def __get_capabilities(self):
36        file_capabilities = ""
37       
38        if self.flag == FLAG.ZIP:
39            file_capabilities += "download expand_description"
40       
41        return file_capabilities
42   
43    capabilities = property(__get_capabilities, None, None, "")   
44
45class FileSet(FileSetEntry):
46    """
47    An aggregation of FileSetEntry objects.  FileSet inherits from FileSetEntry
48    so that FileSet objects can contain other FileSet objects (typically
49    representing zipfiles).
50   
51    @ivar contents: A mutable sequence of FileSet contents
52       
53    """
54       
55    def __init__(self, flag=FLAG.ROOT, name='root', size=None, info="Root fileset"):
56        super(FileSet, self).__init__(flag, name, size, info)
57        self.contents = []
58       
59    def __iter__(self):
60        return iter(self.contents)
61       
62def dryRunZipFileSet(fileSet, context, maxFileVolume):
63    """
64    Pretend to zip up the contents of a FileSet into zipfiles, guessing file volumes.
65   
66    """
67    # Compression ratios are taken from context.config.
68    compressionRatios = dict(context.config['compression_ratios'])
69    #log.debug("compressionRatios: %s" % compressionRatios)
70   
71    i = 1
72    size = 0
73    zipFileSets = []
74   
75    for fs in fileSet.contents:
76       
77       
78        # New zipFile
79        if size == 0:
80            zipname = 'output_%d.zip' % i
81            zipFileSet = FileSet(FLAG.ZIP, name=zipname, info='Zip file %d' % i)
82
83        ext = os.path.splitext(fs.name)[1]
84        ext = ext.strip('.')
85
86        dratio = float(compressionRatios.get('default_ratio', 1.0))
87        ratio = float(compressionRatios.get(ext, dratio))
88       
89        size += fs.size * ratio
90        zipFileSet.contents.append(fs)
91       
92        if size >= maxFileVolume:
93            zipFileSet.size = int(size)
94           
95            log.debug("total file size for zipFileSet.name = %s is %s MB" % (zipFileSet.name, float(zipFileSet.size)/4**10))
96           
97            zipFileSets.append(zipFileSet)
98            size = 0
99            i += 1
100   
101    #update the last set with the size, only if there have been more files added       
102    if size > 0:
103        zipFileSet.size = int(size)
104        zipFileSets.append(zipFileSet)
105        log.debug("total file size for zipFileSet.name = %s is %s MB" % (zipFileSet.name, float(zipFileSet.size)/4**10))
106   
107    if len(zipFileSets) == 1:
108        zipFileSets[0].name = 'output.zip'
109   
110    fileSet.contents = zipFileSets
111   
112    return fileSet
113
114def zipFileSet(fileSet, context, maxFileVolume, keep = False):
115    """
116    Zip up the contents of a FileSet into zipfiles of maximum size
117    I{maxFileVolume}.
118   
119    @return: I{fileSet} mutated to contain the zipfiles created.
120   
121    """
122    i = 1
123    size = 0
124    zipFileSets = []
125   
126   
127    for fs in fileSet.contents:
128        # New zipfile
129        if size == 0:
130            zipname = 'output_%d.zip' % i
131            fullZipFilename = os.path.join(context.outputDir, zipname)
132            zip = zipfile.ZipFile(fullZipFilename, 'w', zipfile.ZIP_DEFLATED)
133            zipFileSet = FileSet(flag=FLAG.ZIP, name=zipname, info='Zip file %d' % i)
134       
135        #!TODO: This way of invoking zip will not correctly archive directories.
136        zip.write(os.path.join(context.outputDir, fs.name), fs.name)
137        zi = zip.getinfo(fs.name)
138        size += zi.compress_size
139       
140        zi.comment = fs.info
141        zipFileSet.contents.append(fs)
142       
143        if size >= maxFileVolume:
144            # the true file size is slightly larger than the compressed size
145            # of all the contained files, query the os to get the true size of
146            # the zip file. Also need to close the zip file first.
147            filename = zip.filename
148            zip.close()
149            zipFileSet.size = _getFileSize(filename)
150            zipFileSets.append(zipFileSet)
151            size = 0
152            i += 1
153   
154    filename = zip.filename
155    zip.close()
156    zipFileSet.size = _getFileSize(filename)
157    zipFileSets.append(zipFileSet)
158   
159    log.debug("len(zipFileSets) = %s" % (len(zipFileSets),))
160   
161    if len(zipFileSets) == 1:
162        log.debug("renaming")
163        oldName = 'output_1.zip'
164        newName = 'output.zip'
165       
166        os.rename(os.path.join(context.outputDir, oldName), 
167                  os.path.join(context.outputDir, newName))
168       
169        zipFileSets[0].name = newName
170       
171   
172    log.info("Output zipped up into %s zip files" % (len(zipFileSets),))
173   
174    # Now we know this zipfile is complete clean up.
175    if keep == False:
176        for zipFileSet in zipFileSets:
177            for fs in zipFileSet.contents:
178                os.remove(os.path.join(context.outputDir, fs.name))
179   
180    fileSet.contents = zipFileSets
181    return fileSet
182
183
184def _getFileSize(fullFileName):
185    fileStats = os.stat(fullFileName)
186    return fileStats[stat.ST_SIZE]
Note: See TracBrowser for help on using the repository browser.