HTML Input: Ignore files whose names have been incorrectly encoded. Fixes #4820 (UnicodeDecodeError while starting ebook-convert on Linux)

This commit is contained in:
Kovid Goyal 2010-02-09 13:24:56 -07:00
parent b91afbb303
commit ba2c202ae1

View File

@ -16,9 +16,10 @@ from urllib import unquote as urlunquote
from urlparse import urljoin from urlparse import urljoin
from lxml import etree, html from lxml import etree, html
from cssutils import CSSParser
import calibre import calibre
from cssutils import CSSParser from calibre.constants import filesystem_encoding
from calibre.translations.dynamic import translate from calibre.translations.dynamic import translate
from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.chardet import xml_to_unicode
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
@ -434,10 +435,18 @@ class DirContainer(object):
def namelist(self): def namelist(self):
names = [] names = []
for root, dirs, files in os.walk(self.rootdir): base = self.rootdir
if isinstance(base, unicode):
base = base.encode(filesystem_encoding)
for root, dirs, files in os.walk(base):
for fname in files: for fname in files:
fname = os.path.join(root, fname) fname = os.path.join(root, fname)
fname = fname.replace('\\', '/') fname = fname.replace('\\', '/')
if not isinstance(fname, unicode):
try:
fname = fname.decode(filesystem_encoding)
except:
continue
names.append(fname) names.append(fname)
return names return names