1 | """ |
---|
2 | Implements the data structure originally designed by Ag to store a nested set of |
---|
3 | data or zipfiles. |
---|
4 | |
---|
5 | """ |
---|
6 | import logging |
---|
7 | import zipfile |
---|
8 | import os |
---|
9 | import stat |
---|
10 | |
---|
11 | class FLAG: |
---|
12 | ROOT = 'r' |
---|
13 | DATA = 'data' |
---|
14 | ZIP = 'zip' |
---|
15 | IMAGE = 'image' |
---|
16 | |
---|
17 | |
---|
18 | log = logging.getLogger(__name__) |
---|
19 | |
---|
20 | class FileSetEntry(object): |
---|
21 | """ |
---|
22 | @ivar flag: A flag designating the file type |
---|
23 | @ivar size: The size of the file |
---|
24 | @ivar name: the name of the file relative to it's container |
---|
25 | @ivar info: Some description text about the file |
---|
26 | @ivar capabilities: Some text used to define what can be done with the file |
---|
27 | |
---|
28 | """ |
---|
29 | def __init__(self, flag, name, size, info): |
---|
30 | self.flag = flag |
---|
31 | self.name = name |
---|
32 | self.size = size |
---|
33 | self.info = info |
---|
34 | |
---|
35 | def __get_capabilities(self): |
---|
36 | file_capabilities = "" |
---|
37 | |
---|
38 | if self.flag == FLAG.ZIP: |
---|
39 | file_capabilities += "download expand_description" |
---|
40 | |
---|
41 | return file_capabilities |
---|
42 | |
---|
43 | capabilities = property(__get_capabilities, None, None, "") |
---|
44 | |
---|
45 | class FileSet(FileSetEntry): |
---|
46 | """ |
---|
47 | An aggregation of FileSetEntry objects. FileSet inherits from FileSetEntry |
---|
48 | so that FileSet objects can contain other FileSet objects (typically |
---|
49 | representing zipfiles). |
---|
50 | |
---|
51 | @ivar contents: A mutable sequence of FileSet contents |
---|
52 | |
---|
53 | """ |
---|
54 | |
---|
55 | def __init__(self, flag=FLAG.ROOT, name='root', size=None, info="Root fileset"): |
---|
56 | super(FileSet, self).__init__(flag, name, size, info) |
---|
57 | self.contents = [] |
---|
58 | |
---|
59 | def __iter__(self): |
---|
60 | return iter(self.contents) |
---|
61 | |
---|
62 | def dryRunZipFileSet(fileSet, context, maxFileVolume): |
---|
63 | """ |
---|
64 | Pretend to zip up the contents of a FileSet into zipfiles, guessing file volumes. |
---|
65 | |
---|
66 | """ |
---|
67 | # Compression ratios are taken from context.config. |
---|
68 | compressionRatios = dict(context.config['compression_ratios']) |
---|
69 | #log.debug("compressionRatios: %s" % compressionRatios) |
---|
70 | |
---|
71 | i = 1 |
---|
72 | size = 0 |
---|
73 | zipFileSets = [] |
---|
74 | |
---|
75 | for fs in fileSet.contents: |
---|
76 | |
---|
77 | |
---|
78 | # New zipFile |
---|
79 | if size == 0: |
---|
80 | zipname = 'output_%d.zip' % i |
---|
81 | zipFileSet = FileSet(FLAG.ZIP, name=zipname, info='Zip file %d' % i) |
---|
82 | |
---|
83 | ext = os.path.splitext(fs.name)[1] |
---|
84 | ext = ext.strip('.') |
---|
85 | |
---|
86 | dratio = float(compressionRatios.get('default_ratio', 1.0)) |
---|
87 | ratio = float(compressionRatios.get(ext, dratio)) |
---|
88 | |
---|
89 | size += fs.size * ratio |
---|
90 | zipFileSet.contents.append(fs) |
---|
91 | |
---|
92 | if size >= maxFileVolume: |
---|
93 | zipFileSet.size = int(size) |
---|
94 | |
---|
95 | log.debug("total file size for zipFileSet.name = %s is %s MB" % (zipFileSet.name, float(zipFileSet.size)/4**10)) |
---|
96 | |
---|
97 | zipFileSets.append(zipFileSet) |
---|
98 | size = 0 |
---|
99 | i += 1 |
---|
100 | |
---|
101 | #update the last set with the size, only if there have been more files added |
---|
102 | if size > 0: |
---|
103 | zipFileSet.size = int(size) |
---|
104 | zipFileSets.append(zipFileSet) |
---|
105 | log.debug("total file size for zipFileSet.name = %s is %s MB" % (zipFileSet.name, float(zipFileSet.size)/4**10)) |
---|
106 | |
---|
107 | if len(zipFileSets) == 1: |
---|
108 | zipFileSets[0].name = 'output.zip' |
---|
109 | |
---|
110 | fileSet.contents = zipFileSets |
---|
111 | |
---|
112 | return fileSet |
---|
113 | |
---|
114 | def zipFileSet(fileSet, context, maxFileVolume, keep = False): |
---|
115 | """ |
---|
116 | Zip up the contents of a FileSet into zipfiles of maximum size |
---|
117 | I{maxFileVolume}. |
---|
118 | |
---|
119 | @return: I{fileSet} mutated to contain the zipfiles created. |
---|
120 | |
---|
121 | """ |
---|
122 | i = 1 |
---|
123 | size = 0 |
---|
124 | zipFileSets = [] |
---|
125 | |
---|
126 | |
---|
127 | for fs in fileSet.contents: |
---|
128 | # New zipfile |
---|
129 | if size == 0: |
---|
130 | zipname = 'output_%d.zip' % i |
---|
131 | fullZipFilename = os.path.join(context.outputDir, zipname) |
---|
132 | zip = zipfile.ZipFile(fullZipFilename, 'w', zipfile.ZIP_DEFLATED) |
---|
133 | zipFileSet = FileSet(flag=FLAG.ZIP, name=zipname, info='Zip file %d' % i) |
---|
134 | |
---|
135 | #!TODO: This way of invoking zip will not correctly archive directories. |
---|
136 | zip.write(os.path.join(context.outputDir, fs.name), fs.name) |
---|
137 | zi = zip.getinfo(fs.name) |
---|
138 | size += zi.compress_size |
---|
139 | |
---|
140 | zi.comment = fs.info |
---|
141 | zipFileSet.contents.append(fs) |
---|
142 | |
---|
143 | if size >= maxFileVolume: |
---|
144 | # the true file size is slightly larger than the compressed size |
---|
145 | # of all the contained files, query the os to get the true size of |
---|
146 | # the zip file. Also need to close the zip file first. |
---|
147 | filename = zip.filename |
---|
148 | zip.close() |
---|
149 | zipFileSet.size = _getFileSize(filename) |
---|
150 | zipFileSets.append(zipFileSet) |
---|
151 | size = 0 |
---|
152 | i += 1 |
---|
153 | |
---|
154 | filename = zip.filename |
---|
155 | zip.close() |
---|
156 | zipFileSet.size = _getFileSize(filename) |
---|
157 | zipFileSets.append(zipFileSet) |
---|
158 | |
---|
159 | log.debug("len(zipFileSets) = %s" % (len(zipFileSets),)) |
---|
160 | |
---|
161 | if len(zipFileSets) == 1: |
---|
162 | log.debug("renaming") |
---|
163 | oldName = 'output_1.zip' |
---|
164 | newName = 'output.zip' |
---|
165 | |
---|
166 | os.rename(os.path.join(context.outputDir, oldName), |
---|
167 | os.path.join(context.outputDir, newName)) |
---|
168 | |
---|
169 | zipFileSets[0].name = newName |
---|
170 | |
---|
171 | |
---|
172 | log.info("Output zipped up into %s zip files" % (len(zipFileSets),)) |
---|
173 | |
---|
174 | # Now we know this zipfile is complete clean up. |
---|
175 | if keep == False: |
---|
176 | for zipFileSet in zipFileSets: |
---|
177 | for fs in zipFileSet.contents: |
---|
178 | os.remove(os.path.join(context.outputDir, fs.name)) |
---|
179 | |
---|
180 | fileSet.contents = zipFileSets |
---|
181 | return fileSet |
---|
182 | |
---|
183 | |
---|
184 | def _getFileSize(fullFileName): |
---|
185 | fileStats = os.stat(fullFileName) |
---|
186 | return fileStats[stat.ST_SIZE] |
---|