mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Conversion pipeline: Remove encoding declaraions from HTML documents to guarantee that there is only a single encoding declaration in the output HTML. Fixes #773337 (html2epub convertion produces double "charset" directive in EPUB)
This commit is contained in:
parent
107912f63f
commit
b9098e8520
@ -16,7 +16,7 @@ from urllib import unquote as urlunquote
|
||||
from lxml import etree, html
|
||||
from calibre.constants import filesystem_encoding, __version__
|
||||
from calibre.translations.dynamic import translate
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
|
||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
|
||||
from calibre import isbytestring, as_unicode, get_types_map
|
||||
@ -853,6 +853,7 @@ class Manifest(object):
|
||||
self.oeb.log.debug('Parsing', self.href, '...')
|
||||
# Convert to Unicode and normalize line endings
|
||||
data = self.oeb.decode(data)
|
||||
data = strip_encoding_declarations(data)
|
||||
data = self.oeb.html_preprocessor(data)
|
||||
# There could be null bytes in data if it had � entities in it
|
||||
data = data.replace('\0', '')
|
||||
|
Loading…
x
Reference in New Issue
Block a user