HTML Input: Ignore files whose names have been incorrectly encoded. Fixes #4820 (UnicodeDecodeError while starting ebook-convert on Linux)

2025-08-11 09:13:57 -04:00 · 2010-02-09 13:24:56 -07:00 · 2010-02-09 13:24:56 -07:00 · ba2c202ae1
commit ba2c202ae1
parent b91afbb303
1 changed files with 11 additions and 2 deletions
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -16,9 +16,10 @@ from urllib import unquote as urlunquote
 from urlparse import urljoin
 from lxml import etree, html
 from cssutils import CSSParser
 import calibre
-from cssutils import CSSParser
+from calibre.constants import filesystem_encoding
 from calibre.translations.dynamic import translate
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
@ -434,10 +435,18 @@ class DirContainer(object):
    def namelist(self):
        names = []
-        for root, dirs, files in os.walk(self.rootdir):
+        base = self.rootdir
        if isinstance(base, unicode):
            base = base.encode(filesystem_encoding)
        for root, dirs, files in os.walk(base):
            for fname in files:
                fname = os.path.join(root, fname)
                fname = fname.replace('\\', '/')
                if not isinstance(fname, unicode):
                    try:
                        fname = fname.decode(filesystem_encoding)
                    except:
                        continue
                names.append(fname)
        return names