mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Conversion pipeline: Remove encoding declaraions from HTML documents to guarantee that there is only a single encoding declaration in the output HTML. Fixes #773337 (html2epub convertion produces double "charset" directive in EPUB)
This commit is contained in:
parent
107912f63f
commit
b9098e8520
@ -16,7 +16,7 @@ from urllib import unquote as urlunquote
|
|||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
from calibre.constants import filesystem_encoding, __version__
|
from calibre.constants import filesystem_encoding, __version__
|
||||||
from calibre.translations.dynamic import translate
|
from calibre.translations.dynamic import translate
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
|
||||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||||
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
|
from calibre.ebooks.conversion.preprocess import CSSPreProcessor
|
||||||
from calibre import isbytestring, as_unicode, get_types_map
|
from calibre import isbytestring, as_unicode, get_types_map
|
||||||
@ -853,6 +853,7 @@ class Manifest(object):
|
|||||||
self.oeb.log.debug('Parsing', self.href, '...')
|
self.oeb.log.debug('Parsing', self.href, '...')
|
||||||
# Convert to Unicode and normalize line endings
|
# Convert to Unicode and normalize line endings
|
||||||
data = self.oeb.decode(data)
|
data = self.oeb.decode(data)
|
||||||
|
data = strip_encoding_declarations(data)
|
||||||
data = self.oeb.html_preprocessor(data)
|
data = self.oeb.html_preprocessor(data)
|
||||||
# There could be null bytes in data if it had � entities in it
|
# There could be null bytes in data if it had � entities in it
|
||||||
data = data.replace('\0', '')
|
data = data.replace('\0', '')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user