mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
HTMLZ input: Fix handling of HTML files encoded in an encoding other than UTF-8
This commit is contained in:
commit
19f35f55bc
@ -10,6 +10,7 @@ import os
|
||||
|
||||
from calibre import walk
|
||||
from calibre.customize.conversion import InputFormatPlugin
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
class HTMLZInput(InputFormatPlugin):
|
||||
@ -34,6 +35,13 @@ class HTMLZInput(InputFormatPlugin):
|
||||
html = tf.read()
|
||||
break
|
||||
|
||||
# Encoding
|
||||
if options.input_encoding:
|
||||
ienc = options.input_encoding
|
||||
else:
|
||||
ienc = xml_to_unicode(html[:4096])[-1]
|
||||
html = html.decode(ienc, 'replace')
|
||||
|
||||
# Run the HTML through the html processing plugin.
|
||||
from calibre.customize.ui import plugin_for_input_format
|
||||
html_input = plugin_for_input_format('html')
|
||||
|
Loading…
x
Reference in New Issue
Block a user