From a7736ab44ded4b9307fd8d1bb039d4a8e789bc80 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Oct 2024 13:21:56 +0530 Subject: [PATCH] Fix sentences in tail of parent element not being added --- src/calibre/ebooks/oeb/polish/tests/structure.py | 6 +++--- src/calibre/ebooks/oeb/polish/tts.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/oeb/polish/tests/structure.py b/src/calibre/ebooks/oeb/polish/tests/structure.py index fa53d6d808..4c9e0c7a69 100644 --- a/src/calibre/ebooks/oeb/polish/tests/structure.py +++ b/src/calibre/ebooks/oeb/polish/tests/structure.py @@ -254,7 +254,9 @@ class Structure(BaseTest): }.items()): root = parse(text, namespace_elements=True) orig = normalize_markup(root) - mark_sentences_in_html(root) + sentences = mark_sentences_in_html(root) + ids = tuple(int(s.elem_id[len(id_prefix):]) for s in sentences) + self.assertEqual(len(ids), ids[-1]) marked = normalize_markup(root) self.assertEqual(expected, marked) unmark_sentences_in_html(root) @@ -263,8 +265,6 @@ class Structure(BaseTest): self.assertEqual(tuple(s.lang for s in sentences), ('eng', 'fra')) - - def find_tests(): import unittest return unittest.defaultTestLoader.loadTestsFromTestCase(Structure) diff --git a/src/calibre/ebooks/oeb/polish/tts.py b/src/calibre/ebooks/oeb/polish/tts.py index f5572ebac9..422ebc995f 100644 --- a/src/calibre/ebooks/oeb/polish/tts.py +++ b/src/calibre/ebooks/oeb/polish/tts.py @@ -84,6 +84,7 @@ def mark_sentences_in_html(root, lang: str = '', voice: str = '') -> list[Senten self.tag_name = tag_name self.lang = child_lang or lang_for_elem(elem, parent_lang) self.parent_lang = parent_lang + self.parent_voice = parent_voice q = elem.get('data-calibre-tts', '') self.voice = parent_voice if q.startswith('{'): # } @@ -127,6 +128,7 @@ def mark_sentences_in_html(root, lang: str = '', voice: str = '') -> list[Senten before = self.elem.tail[:start] span = self.make_wrapper(text, p) spans.append(span) + ans.append(Sentence(span.get('id'), text, self.parent_lang, self.parent_voice)) after = self.elem.tail[end:] self.elem.tail = before if after and spans: