From ba2c202ae13d0675cdf4299a5f8e28c06072cbdd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 9 Feb 2010 13:24:56 -0700 Subject: [PATCH] HTML Input: Ignore files whose names have been incorrectly encoded. Fixes #4820 (UnicodeDecodeError while starting ebook-convert on Linux) --- src/calibre/ebooks/oeb/base.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index c93a0689b2..7c2efe20ef 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -16,9 +16,10 @@ from urllib import unquote as urlunquote from urlparse import urljoin from lxml import etree, html +from cssutils import CSSParser import calibre -from cssutils import CSSParser +from calibre.constants import filesystem_encoding from calibre.translations.dynamic import translate from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.oeb.entitydefs import ENTITYDEFS @@ -434,10 +435,18 @@ class DirContainer(object): def namelist(self): names = [] - for root, dirs, files in os.walk(self.rootdir): + base = self.rootdir + if isinstance(base, unicode): + base = base.encode(filesystem_encoding) + for root, dirs, files in os.walk(base): for fname in files: fname = os.path.join(root, fname) fname = fname.replace('\\', '/') + if not isinstance(fname, unicode): + try: + fname = fname.decode(filesystem_encoding) + except: + continue names.append(fname) return names