source: cows_wps/trunk/cows_wps/utils/zip_utils.py @ 5615

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/cows_wps/trunk/cows_wps/utils/zip_utils.py@7575
Revision 5615, 6.9 KB checked in by spascoe, 10 years ago (diff)

COWS WPS package copied from
 http://proj.badc.rl.ac.uk/svn/dcip/cows-wps/trunk.

This is a stripped down version of the DDP WPS. Some features are
removed and others have been deactivated until we reimplement them in a
more generic way.

Line 
1"""
2zip_utils.py
3============
4
5Useful utilities for packaging data into zip files and recording
6contents in XML.
7
8"""
9
10# Standard library imports
11import os
12import zipfile
13import cElementTree as ET
14import cows_wps.utils.filesystem_utils as fs_utils
15
16
17def findCommonDir(file_paths):
18    """
19    Returns a common directory for a list of file paths.
20    Raises an error if no common dir can be found.
21    """
22    dir_parts = [os.path.split(i)[0].split("/") for i in file_paths] 
23    min_len = 1000
24    for d in dir_parts:
25        if len(d) < min_len: min_len = len(d)
26
27    dir_parts = [dp[:min_len] for dp in dir_parts]
28    common_dir_parts = []
29
30    for i in range(len(dir_parts[0])):
31        dp0 = dir_parts[0][i]
32        same = True
33        for dp in dir_parts[1:]:
34            dpx = dp[i] 
35            if dpx != dp0:
36                same = False
37        if same == False:
38            break
39        else:
40            common_dir_parts.append(dp0)           
41       
42    common_dir = "/".join(common_dir_parts) 
43    if len(common_dir_parts) == 1 and common_dir_parts[0] == "": common_dir = "/"
44       
45    if common_dir == "/": 
46        raise Exception("Could not find common directory in file paths: " + `file_paths`)
47
48    return common_dir
49
50
51def zip(file_list, zip_file_name, common_dir=None, compress=True):
52    """
53    Zips up files in file_list with compression switched on or off.
54    Returns name of zip file.
55    """
56    if compress == False:
57        compression = zipfile.ZIP_STORED
58    else:
59        compression = zipfile.ZIP_DEFLATED
60
61    # Use relative path if common directory given
62    if common_dir != None:
63        try:
64            os.chdir(common_dir)
65        except:
66            raise OSError("Could not change directory to: " + `common_dir`) 
67
68    z = zipfile.ZipFile(zip_file_name, mode="w", compression=compression)
69    for f in file_list:
70        z.write(f)
71
72    z.close()
73    return zip_file_name
74
75   
76def wrapZip(file_paths, zip_file_stem="test_zip_file"):
77    """
78    Takes a list of files (file_paths), zip_file_stem for generating zip file name.
79    Returns a tuple of (file_path, size) for the zip file.
80    """
81    common_dir = findCommonDir(file_paths)
82    file_paths = [fp.replace(common_dir + "/", "") for fp in file_paths]
83    zip_file_path = zip_file_stem + ".zip"
84    zip(file_paths, zip_file_path, common_dir)
85    size = os.path.getsize(zip_file_path)
86    return (zip_file_path, size)
87
88
89def wrapZipXML(file_set, zip_file_stem="test_zip_file", set_length=1, n=1):
90    """
91    Takes XML file_set string, zip_file_stem for generating zip file name,
92    set_length (number of files in set) and n (this file number in the set).
93    Returns file_set XML element for the new zip file, with other file_set
94    embedded within it.
95    """
96    fs_xml = ET.fromstring(file_set)
97    file_urls = [x.findall("FileURL")[0].text for x in fs_xml.getchildren()]
98    file_paths = [x for x in file_urls]
99
100    zip_file_name = zip_file_stem + ".zip"
101    zip(file_paths, zip_file_name)
102   
103    size = os.path.getsize(zip_file_name)
104    info = "Zip file: %s of %s files." % (n, set_length)
105    contents = file_set
106    # now render
107    zip_file_set = """<FileSet>
108  <FileDetails>
109    <FileURL>%s</FileURL>
110    <FileInfo>%s</FileInfo>
111    <FileType>%s</FileType>
112    <FileContents>
113%s
114    </FileContents>
115  </FileDetails>
116</FileSet>""" % (zip_file_name, info, "zip", contents)
117    return zip_file_set
118
119
120def makeZip(zipFile, files, deleteFiles=False):
121    """
122    Writes a zip file storing only relative paths in the zip file. All inputs
123    must be given as full paths.
124   
125    @type zipFile: string
126    @param zipFile: the full path to the zip file to create
127   
128    @type files: list of strings
129    @param files: A list of the full paths of the files to add to the archive
130   
131    @type deleteFiles: bool
132    @keyword deleteFiles: If set to True then will delete the files after they
133       have been added to the zip file.
134       
135    """
136    zipPath = os.path.dirname(zipFile)
137    zf = zipfile.ZipFile(zipFile, 'w')
138    try:
139        for f in files:
140           
141            # only store the name relative to the zip file path
142            # so remove the zip path form the start of the file
143            if len(zipPath) > 0 and f.find(zipPath) == 0:
144                zipName = f[len(zipPath)+1:]
145            else:
146                zipName = f
147               
148            zf.write(f, zipName)
149           
150    finally:
151        zf.close()
152       
153    if deleteFiles:
154        for f in files:
155            os.remove(f)
156 
157def unzipFiles(archivePath, outputFolder=None):
158    """
159    Unzips the content of a zip archive. If an output directory is given then
160    the archive will be unzipped to that directory.
161   
162    All files will be unzipped relativly using their path inside the archive,
163    e.g if a file is 'inputs/folder/input.csv' inside the zip file it will be
164    extracted to outputFolder/inputs/folder/inputs.csv. These extra folders will
165    be created if neccesary.
166   
167    @type  archivePath: string
168    @param archivePath: the full path to the zip file
169   
170    @type    outputFolder: string
171    @keyword outputFolder: the full path to the folder the archive will be
172        extracted to. If None the archive will be extracted to its containing
173        folder.
174    """
175   
176    zipFolder = os.path.dirname(archivePath)
177   
178    if outputFolder == None:
179        outputFolder = zipFolder
180   
181    fs_utils.makeSurePathExists(outputFolder, "Output folder %s not found")
182   
183    zf = zipfile.ZipFile(archivePath, 'r')
184   
185    for zInfo in zf.infolist():
186       
187        outputFilename = os.path.join(outputFolder, zInfo.filename)
188       
189        #create the directory to write the folder to if neccesary
190        fs_utils.findOrMakeDir(os.path.dirname(outputFilename))
191
192        fout = open(outputFilename, 'w')
193        fout.write(zf.read(zInfo.filename))
194        fout.close()
195
196
197def isZipFile(path):
198    """
199    Returns a boolean indicating if the file specified corresponds to a zip file
200    """
201    return zipfile.is_zipfile(path)
202
203
204if __name__ == "__main__":
205
206    base_dir = "/tmp"
207    t = os.path.join(base_dir, "ztest")
208    f1 = "blah.txt"
209    d = "dir1"
210    f2 = os.path.join(d, "stuff.txt")
211    os.chdir(base_dir)
212    if not os.path.isdir(t):  os.mkdir(t)
213    os.chdir(t)
214    print os.getcwd()
215    open(f1, "w").write("JOKJOIJ\nsdjfdsklfj\n")
216    if not os.path.isdir(d): os.mkdir(d)
217    open(f2, "w").write("1" * 100000)
218    x = zip((f1, f2), "test_zip_file.zip")
219    print x
220    print "-------------\nTest 2 --------------"
221    os.chdir(base_dir)
222    if not os.path.isdir(t):  os.mkdir(t)
223    os.chdir(t)
224    print os.getcwd()
225    open(f1, "w").write("JOKJOIJ\nsdjfdsklfj\n")
226    if not os.path.isdir(d): os.mkdir(d)
227    open(f2, "w").write("1" * 100000)
228    file_set = """<FileSet>
229<FileDetails>
230  <FileURL>/tmp/ztest/blah.txt</FileURL>
231</FileDetails>
232<FileDetails>
233  <FileURL>/tmp/ztest/dir1/stuff.txt</FileURL>
234</FileDetails>
235</FileSet>
236"""
237    x = wrapZipXML(file_set, "test_zip_file2", set_length=1, n=1)
238    print x
Note: See TracBrowser for help on using the repository browser.