From 6221f6747398d8a6ee9e68fd31cddb00b7e2100d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 2 Jun 2010 14:08:17 -0600 Subject: [PATCH] Fix #5666 (coverter UnicodeDecodeError: 'utf8' codec can't decode byte 0xff in position 57: unexpected code byte) --- src/calibre/ebooks/html/input.py | 4 ++-- src/calibre/ebooks/metadata/html.py | 6 +++++- src/calibre/utils/magick_draw.py | 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 413db1cc0b..6108aa329d 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -416,9 +416,9 @@ class HTMLInput(InputFormatPlugin): link = unquote(link).replace('/', os.sep) if not link.strip(): return link_ - if base and not os.path.isabs(link): - link = os.path.join(base, link) try: + if base and not os.path.isabs(link): + link = os.path.join(base, link) link = os.path.abspath(link) except: return link_ diff --git a/src/calibre/ebooks/metadata/html.py b/src/calibre/ebooks/metadata/html.py index d5aa9b8bef..45b592c709 100644 --- a/src/calibre/ebooks/metadata/html.py +++ b/src/calibre/ebooks/metadata/html.py @@ -11,7 +11,7 @@ import re from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.chardet import xml_to_unicode - +from calibre import entity_to_unicode def get_metadata(stream): src = stream.read() @@ -43,6 +43,10 @@ def get_metadata_(src, encoding=None): if match: author = match.group(2).replace(',', ';') + ent_pat = re.compile(r'&(\S+)?;') + title = ent_pat.sub(entity_to_unicode, title) + if author: + author = ent_pat.sub(entity_to_unicode, author) mi = MetaInformation(title, [author] if author else None) # Publisher diff --git a/src/calibre/utils/magick_draw.py b/src/calibre/utils/magick_draw.py index 0288107b45..5625da0869 100644 --- a/src/calibre/utils/magick_draw.py +++ b/src/calibre/utils/magick_draw.py @@ -51,6 +51,8 @@ class FontMetrics(object): def get_font_metrics(image, d_wand, text): + if isinstance(text, unicode): + text = text.encode('utf-8') ret = p.MagickQueryFontMetrics(image, d_wand, text) return FontMetrics(ret)