mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
HTMLZ Input: Detect or use input encoding.
This commit is contained in:
parent
5b82c42e4b
commit
4f3abc5614
@ -10,6 +10,7 @@ import os
|
|||||||
|
|
||||||
from calibre import walk
|
from calibre import walk
|
||||||
from calibre.customize.conversion import InputFormatPlugin
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
class HTMLZInput(InputFormatPlugin):
|
class HTMLZInput(InputFormatPlugin):
|
||||||
@ -34,6 +35,13 @@ class HTMLZInput(InputFormatPlugin):
|
|||||||
html = tf.read()
|
html = tf.read()
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Encoding
|
||||||
|
if options.input_encoding:
|
||||||
|
ienc = options.input_encoding
|
||||||
|
else:
|
||||||
|
ienc = xml_to_unicode(html[:4096])[-1]
|
||||||
|
html = html.decode(ienc, 'replace')
|
||||||
|
|
||||||
# Run the HTML through the html processing plugin.
|
# Run the HTML through the html processing plugin.
|
||||||
from calibre.customize.ui import plugin_for_input_format
|
from calibre.customize.ui import plugin_for_input_format
|
||||||
html_input = plugin_for_input_format('html')
|
html_input = plugin_for_input_format('html')
|
||||||
@ -48,7 +56,7 @@ class HTMLZInput(InputFormatPlugin):
|
|||||||
fname = 'index%d.html'%c
|
fname = 'index%d.html'%c
|
||||||
htmlfile = open(fname, 'wb')
|
htmlfile = open(fname, 'wb')
|
||||||
with htmlfile:
|
with htmlfile:
|
||||||
htmlfile.write(html.encode('utf-8'))
|
htmlfile.write(html.encode('utf-8', 'replace'))
|
||||||
odi = options.debug_pipeline
|
odi = options.debug_pipeline
|
||||||
options.debug_pipeline = None
|
options.debug_pipeline = None
|
||||||
# Generate oeb from html conversion.
|
# Generate oeb from html conversion.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user