mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More work on kepubify
This commit is contained in:
parent
d8a744ceea
commit
c7557b23b2
@ -16,7 +16,7 @@ import re
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.oeb.base import XHTML, XPath, serialize
|
||||
from calibre.ebooks.oeb.base import XHTML, XPath, escape_cdata
|
||||
from calibre.ebooks.oeb.parse_utils import barename, merge_multiple_html_heads_and_bodies
|
||||
from calibre.ebooks.oeb.polish.parsing import parse
|
||||
from calibre.ebooks.oeb.polish.tts import lang_for_elem
|
||||
@ -178,7 +178,10 @@ def remove_kobo_markup_from_html(root):
|
||||
|
||||
|
||||
def serialize_html(root) -> bytes:
|
||||
return serialize(root, 'text/html')
|
||||
escape_cdata(root)
|
||||
ans = etree.tostring(root, encoding='unicode', xml_declaration=False, pretty_print=False, with_tail=False)
|
||||
ans = ans.replace('\xa0', ' ')
|
||||
return b"<?xml version='1.0' encoding='utf-8'?>\n" + ans.encode('utf-8')
|
||||
|
||||
|
||||
def kepubify_parsed_html(root, metadata_lang: str = 'en'):
|
||||
|
@ -49,6 +49,12 @@ div#book-inner { margin-top: 0; margin-bottom: 0; }</style></head><body><div id=
|
||||
'<div>Svg: <svg>mouse</svg><i> no tail':
|
||||
'<div><span class="koboSpan" id="kobo.1.1">Svg: </span><svg xmlns="http://www.w3.org/2000/svg">mouse</svg>'
|
||||
'<i> <span class="koboSpan" id="kobo.1.2">no tail</span></i></div>',
|
||||
|
||||
# encoding quirks
|
||||
'<p>A\xa0nbsp; ':
|
||||
'<p><span class="koboSpan" id="kobo.1.1">A nbsp; </span></p>',
|
||||
'<div><script>1 < 2 & 3</script>': # escaping with cdata note that kepubify doesnt do this
|
||||
'<div><script><![CDATA[1 < 2 & 3]]></script></div>',
|
||||
}.items():
|
||||
with self.subTest(src=src):
|
||||
root = kepubify_html_data(src)
|
||||
|
Loading…
x
Reference in New Issue
Block a user