kepubify: Fix handling of images

This commit is contained in:
Kovid Goyal 2025-02-20 10:55:35 +05:30
parent 4faf5fb9e5
commit 142c2b0314
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 31 additions and 15 deletions

View File

@ -104,7 +104,7 @@ def add_kobo_spans(inner, root_lang):
segnum += 1 segnum += 1
return parent.makeelement(span_tag_name, attrib={'class': 'koboSpan', 'id': f'kobo.{paranum}.{segnum}'}) return parent.makeelement(span_tag_name, attrib={'class': 'koboSpan', 'id': f'kobo.{paranum}.{segnum}'})
def wrap_text_in_spans(text: str, parent: etree.ElementBase, after_child: etree.ElementBase, lang: str) -> str | None: def wrap_text_in_spans(text: str, parent: etree.Element, after_child: etree.ElementBase, lang: str) -> str | None:
nonlocal increment_next_para, paranum, segnum nonlocal increment_next_para, paranum, segnum
if increment_next_para: if increment_next_para:
paranum += 1 paranum += 1
@ -114,7 +114,10 @@ def add_kobo_spans(inner, root_lang):
ws = None ws = None
if num := len(text) - len(stripped): if num := len(text) - len(stripped):
ws = text[:num] ws = text[:num]
at = 0 if after_child is None else parent.index(after_child) + 1 try:
at = 0 if after_child is None else parent.index(after_child) + 1
except ValueError: # wrapped child
at = parent.index(after_child.getparent()) + 1
if at: if at:
parent[at-1].tail = ws parent[at-1].tail = ws
else: else:
@ -125,28 +128,36 @@ def add_kobo_spans(inner, root_lang):
parent.insert(at, s) parent.insert(at, s)
at += 1 at += 1
def wrap_child(child: etree.Element) -> etree.Element:
nonlocal increment_next_para, paranum, segnum
increment_next_para = False
paranum += 1
segnum = 0
node = child.getparent()
idx = node.index(child)
w = kobo_span(node)
node[idx] = w
w.append(child)
w.tail = child.tail
child.tail = child.text = None
return w
while stack: while stack:
node, parent, tagname, node_lang = p() node, parent, tagname, node_lang = p()
if parent is not None: if parent is not None: # tail text
wrap_text_in_spans(node, parent, tagname, node_lang) wrap_text_in_spans(node, parent, tagname, node_lang)
continue continue
if tagname == 'img':
wrap_child(node)
continue
if not increment_next_para and tagname in BLOCK_TAGS: if not increment_next_para and tagname in BLOCK_TAGS:
increment_next_para = True increment_next_para = True
for child in reversed(node): for child in reversed(node):
child_name = barename(child.tag).lower() if isinstance(child.tag, str) else ''
if child.tail: if child.tail:
a((child.tail, node, child, node_lang)) a((child.tail, node, child, node_lang))
if isinstance(child.tag, str): if child_name not in SKIPPED_TAGS:
child_name = barename(child.tag).lower() a((child, None, child_name, lang_for_elem(child, node_lang)))
if child_name == 'img':
increment_next_para = False
paranum += 1
segnum = 0
idx = node.index(child)
w = kobo_span(node)
w.append(child)
node[idx] = w
elif child_name not in SKIPPED_TAGS:
a((child, None, child_name, lang_for_elem(child, node_lang)))
if node.text: if node.text:
wrap_text_in_spans(node.text, node, None, node_lang) wrap_text_in_spans(node.text, node, None, node_lang)

View File

@ -27,10 +27,15 @@ div#book-inner { margin-top: 0; margin-bottom: 0; }</style></head><body><div id=
'<span class="koboSpan" id="kobo.2.6">Another.</span></p>', '<span class="koboSpan" id="kobo.2.6">Another.</span></p>',
# img tags # img tags
'<p>An image<img src="x">with tail':
'<p><span class="koboSpan" id="kobo.1.1">An image</span><span class="koboSpan" id="kobo.2.1">'
'<img src="x"/></span><span class="koboSpan" id="kobo.2.2">with tail</span></p>'
# comments # comments
# nested block tags # nested block tags
# skipped tags
}.items(): }.items():
with self.subTest(src=src): with self.subTest(src=src):
root = parse(src) root = parse(src)