source: cows_wps/trunk/cows_wps/process_handler/fileset.py @ 5615

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/cows_wps/trunk/cows_wps/process_handler/fileset.py@5615
Revision 5615, 5.6 KB checked in by spascoe, 11 years ago (diff)

COWS WPS package copied from
 http://proj.badc.rl.ac.uk/svn/dcip/cows-wps/trunk.

This is a stripped down version of the DDP WPS. Some features are
removed and others have been deactivated until we reimplement them in a
more generic way.

Line 
1"""
2Implements the data structure originally designed by Ag to store a nested set of
3data or zipfiles.
4
5"""
6import logging
7import zipfile
8import os
9import stat
10
11class FLAG:
12    ROOT = 'r'
13    DATA = 'data'
14    ZIP = 'zip'
15    IMAGE = 'image'
16
17
18log = logging.getLogger(__name__)
19
20class FileSetEntry(object):
21    """
22    @ivar flag: A flag designating the file type
23    @ivar size: The size of the file
24    @ivar name: the name of the file relative to it's container
25    @ivar info: Some description text about the file
26    @ivar capabilities: Some text used to define what can be done with the file
27   
28    """
29    def __init__(self, flag, name, size, info):
30        self.flag = flag
31        self.name = name
32        self.size = size
33        self.info = info
34           
35    def __get_capabilities(self):
36        file_capabilities = ""
37       
38        if self.flag == FLAG.ZIP:
39            file_capabilities += "download expand_description"
40       
41        return file_capabilities
42   
43    capabilities = property(__get_capabilities, None, None, "")   
44
45class FileSet(FileSetEntry):
46    """
47    An aggregation of FileSetEntry objects.  FileSet inherits from FileSetEntry
48    so that FileSet objects can contain other FileSet objects (typically
49    representing zipfiles).
50   
51    @ivar contents: A mutable sequence of FileSet contents
52       
53    """
54       
55    def __init__(self, flag=FLAG.ROOT, name='root', size=None, info="Root fileset"):
56        super(FileSet, self).__init__(flag, name, size, info)
57        self.contents = []
58       
59    def __iter__(self):
60        return iter(self.contents)
61       
62def dryRunZipFileSet(fileSet, context, maxFileVolume):
63    """
64    Pretend to zip up the contents of a FileSet into zipfiles, guessing file volumes.
65   
66    """
67    # Compression ratios are taken from context.config.
68    compressionRatios = dict(context.config['compression_ratios'])
69    #log.debug("compressionRatios: %s" % compressionRatios)
70   
71    i = 1
72    size = 0
73    zipFileSets = []
74   
75    for fs in fileSet.contents:
76       
77       
78        # New zipFile
79        if size == 0:
80            zipname = 'output_%d.zip' % i
81            zipFileSet = FileSet(FLAG.ZIP, name=zipname, info='Zip file %d' % i)
82
83        ext = os.path.splitext(fs.name)[1]
84        ext = ext.strip('.')
85
86        dratio = float(compressionRatios.get('default_ratio', 1.0))
87        ratio = float(compressionRatios.get(ext, dratio))
88       
89        size += fs.size * ratio
90        zipFileSet.contents.append(fs)
91       
92        if size >= maxFileVolume:
93            zipFileSet.size = int(size)
94           
95            log.debug("total file size for zipFileSet.name = %s is %s MB" % (zipFileSet.name, float(zipFileSet.size)/4**10))
96           
97            zipFileSets.append(zipFileSet)
98            size = 0
99            i += 1
100   
101    #update the last set with the size, only if there have been more files added       
102    if size > 0:
103        zipFileSet.size = int(size)
104        zipFileSets.append(zipFileSet)
105        log.debug("total file size for zipFileSet.name = %s is %s MB" % (zipFileSet.name, float(zipFileSet.size)/4**10))
106   
107    if len(zipFileSets) == 1:
108        zipFileSets[0].name = 'output.zip'
109   
110    fileSet.contents = zipFileSets
111   
112    return fileSet
113
114def zipFileSet(fileSet, context, maxFileVolume, keep=False):
115    """
116    Zip up the contents of a FileSet into zipfiles of maximum size
117    I{maxFileVolume}.
118   
119    @return: I{fileSet} mutated to contain the zipfiles created.
120   
121    """
122   
123    i = 1
124    #!TODO: zipfile size doesn't account for zip headers.
125    size = 0
126    zipFileSets = []
127   
128   
129    for fs in fileSet.contents:
130        # New zipfile
131        if size == 0:
132            zipname = 'output_%d.zip' % i
133            fullZipFilename = os.path.join(context.outputDir, zipname)
134            zip = zipfile.ZipFile(fullZipFilename, 'w', zipfile.ZIP_DEFLATED)
135            zipFileSet = FileSet(flag=FLAG.ZIP, name=zipname, info='Zip file %d' % i)
136       
137        #!TODO: This way of invoking zip will not correctly archive directories.
138        zip.write(os.path.join(context.outputDir, fs.name), fs.name)
139        zi = zip.getinfo(fs.name)
140        size += zi.compress_size
141       
142        zi.comment = fs.info
143        zipFileSet.contents.append(fs)
144       
145        if size >= maxFileVolume:
146            # the true file size is slightly larger than the compressed size
147            # of all the contained files, query the os to get the true size of
148            # the zip file. Also need to close the zip file first.
149            filename = zip.filename
150            zip.close()
151            zipFileSet.size = _getFileSize(filename)
152            zipFileSets.append(zipFileSet)
153            size = 0
154            i += 1
155   
156    filename = zip.filename
157    zip.close()
158    zipFileSet.size = _getFileSize(filename)
159    zipFileSets.append(zipFileSet)
160   
161    log.debug("len(zipFileSets) = %s" % (len(zipFileSets),))
162   
163    if len(zipFileSets) == 1:
164        log.debug("renaming")
165        oldName = 'output_1.zip'
166        newName = 'output.zip'
167       
168        os.rename(os.path.join(context.outputDir, oldName), 
169                  os.path.join(context.outputDir, newName))
170       
171        zipFileSets[0].name = newName
172       
173   
174    log.info("Output zipped up into %s zip files" % (len(zipFileSets),))
175   
176    # Now we know this zipfile is complete clean up.
177    if keep == False:
178        for zipFileSet in zipFileSets:
179            for fs in zipFileSet.contents:
180                os.remove(os.path.join(context.outputDir, fs.name))
181   
182    fileSet.contents = zipFileSets
183    return fileSet
184
185def _getFileSize(fullFileName):
186    fileStats = os.stat(fullFileName)
187    return fileStats[stat.ST_SIZE]
Note: See TracBrowser for help on using the repository browser.