mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Make replacing of files in ZIP archives faster and hopefullt more robust
This commit is contained in:
parent
1c2229d0eb
commit
137d83c0e2
@ -6,8 +6,8 @@ from __future__ import with_statement
|
||||
import struct, os, time, sys, shutil
|
||||
import binascii, cStringIO
|
||||
from contextlib import closing
|
||||
from tempfile import SpooledTemporaryFile
|
||||
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre import sanitize_file_name
|
||||
from calibre.constants import filesystem_encoding
|
||||
from calibre.ebooks.chardet import detect
|
||||
@ -467,6 +467,7 @@ class ZipExtFile:
|
||||
|
||||
def __init__(self, fileobj, zipinfo, decrypt=None):
|
||||
self.fileobj = fileobj
|
||||
self.orig_pos = fileobj.tell()
|
||||
self.decrypter = decrypt
|
||||
self.bytes_read = 0L
|
||||
self.rawbuffer = ''
|
||||
@ -582,6 +583,20 @@ class ZipExtFile:
|
||||
result.append(line)
|
||||
return result
|
||||
|
||||
def read_raw(self):
|
||||
pos = self.fileobj.tell()
|
||||
self.fileobj.seek(self.orig_pos)
|
||||
bytes_to_read = self.compress_size
|
||||
if self.decrypter is not None:
|
||||
bytes_to_read -= 12
|
||||
raw = b''
|
||||
|
||||
if bytes_to_read > 0:
|
||||
raw = self.fileobj.read(bytes_to_read)
|
||||
self.fileobj.seek(pos)
|
||||
return raw
|
||||
|
||||
|
||||
def read(self, size = None):
|
||||
# act like file() obj and return empty string if size is 0
|
||||
if size == 0:
|
||||
@ -925,6 +940,11 @@ class ZipFile:
|
||||
"""Return file bytes (as a string) for name."""
|
||||
return self.open(name, "r", pwd).read()
|
||||
|
||||
def read_raw(self, name, mode="r", pwd=None):
|
||||
"""Return the raw bytes in the zipfile corresponding to name."""
|
||||
zef = self.open(name, mode=mode, pwd=pwd)
|
||||
return zef.read_raw()
|
||||
|
||||
def open(self, name, mode="r", pwd=None):
|
||||
"""Return file-like object for 'name'."""
|
||||
if mode not in ("r", "U", "rU"):
|
||||
@ -1159,10 +1179,13 @@ class ZipFile:
|
||||
self.filelist.append(zinfo)
|
||||
self.NameToInfo[zinfo.filename] = zinfo
|
||||
|
||||
def writestr(self, zinfo_or_arcname, bytes, permissions=0600, compression=ZIP_DEFLATED):
|
||||
def writestr(self, zinfo_or_arcname, bytes, permissions=0600,
|
||||
compression=ZIP_DEFLATED, raw_bytes=False):
|
||||
"""Write a file into the archive. The contents is the string
|
||||
'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
|
||||
the name of the file in the archive."""
|
||||
assert not raw_bytes or (raw_bytes and
|
||||
isinstance(zinfo_or_arcname, ZipInfo))
|
||||
if not isinstance(zinfo_or_arcname, ZipInfo):
|
||||
if isinstance(zinfo_or_arcname, unicode):
|
||||
zinfo_or_arcname = zinfo_or_arcname.encode('utf-8')
|
||||
@ -1177,18 +1200,20 @@ class ZipFile:
|
||||
raise RuntimeError(
|
||||
"Attempt to write to ZIP archive that was already closed")
|
||||
|
||||
zinfo.file_size = len(bytes) # Uncompressed size
|
||||
if not raw_bytes:
|
||||
zinfo.file_size = len(bytes) # Uncompressed size
|
||||
zinfo.header_offset = self.fp.tell() # Start of header bytes
|
||||
self._writecheck(zinfo)
|
||||
self._didModify = True
|
||||
zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
|
||||
if zinfo.compress_type == ZIP_DEFLATED:
|
||||
co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
|
||||
zlib.DEFLATED, -15)
|
||||
bytes = co.compress(bytes) + co.flush()
|
||||
zinfo.compress_size = len(bytes) # Compressed size
|
||||
else:
|
||||
zinfo.compress_size = zinfo.file_size
|
||||
if not raw_bytes:
|
||||
zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
|
||||
if zinfo.compress_type == ZIP_DEFLATED:
|
||||
co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
|
||||
zlib.DEFLATED, -15)
|
||||
bytes = co.compress(bytes) + co.flush()
|
||||
zinfo.compress_size = len(bytes) # Compressed size
|
||||
else:
|
||||
zinfo.compress_size = zinfo.file_size
|
||||
zinfo.header_offset = self.fp.tell() # Start of header bytes
|
||||
self.fp.write(zinfo.FileHeader())
|
||||
self.fp.write(bytes)
|
||||
@ -1332,7 +1357,7 @@ class ZipFile:
|
||||
def safe_replace(zipstream, name, datastream):
|
||||
'''
|
||||
Replace a file in a zip file in a safe manner. This proceeds by extracting
|
||||
and re-creating the zipfile. This is neccessary because :method:`ZipFile.replace`
|
||||
and re-creating the zipfile. This is necessary because :method:`ZipFile.replace`
|
||||
sometimes created corrupted zip files.
|
||||
|
||||
:param zipstream: Stream from a zip file
|
||||
@ -1340,21 +1365,20 @@ def safe_replace(zipstream, name, datastream):
|
||||
:param datastream: The data to replace the file with.
|
||||
'''
|
||||
z = ZipFile(zipstream, 'r')
|
||||
names = z.infolist()
|
||||
with TemporaryDirectory('_zipfile_replace') as tdir:
|
||||
z.extractall(path=tdir)
|
||||
mapping = z.extract_mapping
|
||||
path = os.path.join(tdir, *name.split('/'))
|
||||
shutil.copyfileobj(datastream, open(path, 'wb'))
|
||||
with SpooledTemporaryFile(max_size=100*1024*1024) as temp:
|
||||
ztemp = ZipFile(temp, 'w')
|
||||
for obj in z.infolist():
|
||||
if obj.filename == name:
|
||||
ztemp.writestr(obj, datastream.read())
|
||||
else:
|
||||
ztemp.writestr(obj, z.read_raw(obj), raw_bytes=True)
|
||||
ztemp.close()
|
||||
z.close()
|
||||
temp.seek(0)
|
||||
zipstream.seek(0)
|
||||
zipstream.truncate()
|
||||
with closing(ZipFile(zipstream, 'w')) as z:
|
||||
for info in names:
|
||||
current = mapping[info.filename]
|
||||
if os.path.isdir(current):
|
||||
z.writestr(info.filename+'/', '', 0700)
|
||||
else:
|
||||
z.write(current, info.filename, compress_type=info.compress_type)
|
||||
shutil.copyfileobj(temp, zipstream)
|
||||
zipstream.flush()
|
||||
|
||||
class PyZipFile(ZipFile):
|
||||
"""Class to create ZIP archives with Python library files and packages."""
|
||||
|
Loading…
x
Reference in New Issue
Block a user