From b9098e85209faf9ce8197008ffd0430bc204c4f9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 29 Apr 2011 10:32:27 -0600 Subject: [PATCH] Conversion pipeline: Remove encoding declaraions from HTML documents to guarantee that there is only a single encoding declaration in the output HTML. Fixes #773337 (html2epub convertion produces double "charset" directive in EPUB) --- src/calibre/ebooks/oeb/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index c07386e1fd..1f71e32548 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -16,7 +16,7 @@ from urllib import unquote as urlunquote from lxml import etree, html from calibre.constants import filesystem_encoding, __version__ from calibre.translations.dynamic import translate -from calibre.ebooks.chardet import xml_to_unicode +from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations from calibre.ebooks.oeb.entitydefs import ENTITYDEFS from calibre.ebooks.conversion.preprocess import CSSPreProcessor from calibre import isbytestring, as_unicode, get_types_map @@ -853,6 +853,7 @@ class Manifest(object): self.oeb.log.debug('Parsing', self.href, '...') # Convert to Unicode and normalize line endings data = self.oeb.decode(data) + data = strip_encoding_declarations(data) data = self.oeb.html_preprocessor(data) # There could be null bytes in data if it had � entities in it data = data.replace('\0', '')