Zip file handling: Try to correctly interpret files with non english characters in their file names inside the zip file

This commit is contained in:
Kovid Goyal 2010-02-10 16:31:48 -07:00
parent b33bfe2e43
commit 5cc4caf147
2 changed files with 14 additions and 1 deletions

View File

@ -120,7 +120,10 @@ class EbookIterator(object):
bad_map = {}
font_family_pat = re.compile(r'font-family\s*:\s*([^;]+)')
for csspath in css_files:
css = open(csspath, 'rb').read().decode('utf-8', 'replace')
try:
css = open(csspath, 'rb').read().decode('utf-8', 'replace')
except:
continue
for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
block = match.group(1)
family = font_family_pat.search(block)

View File

@ -5,6 +5,7 @@ a zip archive.
from __future__ import with_statement
from calibre.ptempfile import TemporaryDirectory
from calibre import sanitize_file_name
from calibre.constants import filesystem_encoding
import struct, os, time, sys, shutil
import binascii, cStringIO
@ -1030,6 +1031,15 @@ class ZipFile:
targetpath = os.path.join(targetpath, member.filename)
targetpath = os.path.normpath(targetpath)
if not isinstance(targetpath, unicode):
try:
targetpath = targetpath.decode('utf-8')
except:
try:
targetpath = targetpath.decode('cp437')
except:
targetpath = targetpath.decode('utf-8', 'replace')
targetpath = targetpath.encode(filesystem_encoding)
# Create all upper directories if necessary.
upperdirs = os.path.dirname(targetpath)