From 4e621117e3f3f6fb5affb819922cc247e11e39b7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Oct 2024 10:07:30 +0530
Subject: [PATCH] Fix block ordering when marking sentences

---
 .../ebooks/oeb/polish/tests/structure.py      |  6 +++++
 src/calibre/ebooks/oeb/polish/tts.py          | 22 +++++++------------
 2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/src/calibre/ebooks/oeb/polish/tests/structure.py b/src/calibre/ebooks/oeb/polish/tests/structure.py
index 01cbc566a6..69fc6d1bae 100644
--- a/src/calibre/ebooks/oeb/polish/tests/structure.py
+++ b/src/calibre/ebooks/oeb/polish/tests/structure.py
@@ -239,6 +239,12 @@ class Structure(BaseTest):
 
             '<p>Hello, <span data-calibre-tts="moose">world!':
             '<body><p><span id="1">Hello, </span><span data-calibre-tts="moose"><span id="2">world!</span></span></p>',
+
+            '<p>One<p>Two':
+            '<body><p><span id="1">One</span></p><p><span id="2">Two</span></p>',
+
+            '<div><p>something':
+            '<body><div><p><span id="1">something</span></p></div>',
         }.items()):
             root = parse(text, namespace_elements=True)
             orig = normalize_markup(root)
diff --git a/src/calibre/ebooks/oeb/polish/tts.py b/src/calibre/ebooks/oeb/polish/tts.py
index 482fe23a6c..f657ff31f4 100644
--- a/src/calibre/ebooks/oeb/polish/tts.py
+++ b/src/calibre/ebooks/oeb/polish/tts.py
@@ -96,7 +96,6 @@ def mark_sentences_in_html(root, lang: str = '', voice: str = '') -> list[Senten
                 self.texts.append(Chunk(None, elem.text, self.pos))
                 self.pos += len(elem.text)
             self.children = tuple(elem.iterchildren())
-            self.child_pos = 0
 
         def add_simple_child(self, elem):
             if text := elem.text:
@@ -325,27 +324,22 @@ def mark_sentences_in_html(root, lang: str = '', voice: str = '') -> list[Senten
     stack_of_parents = [Parent(elem, 'body', root_lang, root_voice) for elem in root.iterchildren('*') if barename(elem.tag).lower() == 'body']
     while stack_of_parents:
         p = stack_of_parents.pop()
-        if len(p.elem) == 1 and not has_text(p.elem):  # wrapper
-            c = p.elem[0]
-            if isinstance(c.tag, str):
-                stack_of_parents.append(Parent(c, barename(c.tag).lower(), p.lang, p.voice))
-            continue
-        for i in range(p.child_pos, len(p.children)):
-            child = p.children[i]
+        simple_allowed = True
+        children_to_process = []
+        for child in p.children:
             child_voice = child.get('data-calibre-tts', '')
             child_lang = lang_for_elem(child, p.lang)
             child_tag_name = barename(child.tag).lower() if isinstance(child.tag, str) else ''
-            if child_lang == p.lang and child_voice == p.voice and child_tag_name in continued_tag_names and len(child) == 0:
+            if simple_allowed and child_lang == p.lang and child_voice == p.voice and child_tag_name in continued_tag_names and len(child) == 0:
                 p.add_simple_child(child)
             elif child_tag_name not in ignored_tag_names:
-                stack_of_parents.append(Parent(child, child_tag_name, p.lang, p.voice, child_lang=child_lang))
+                simple_allowed = False
+                children_to_process.append(Parent(child, child_tag_name, p.lang, p.voice, child_lang=child_lang))
                 p.commit()
-                p.child_pos = i + 1
-                stack_of_parents.append(p)
-                continue
-            if text := child.tail:
+            if simple_allowed and (text := child.tail):
                 p.add_tail(child, text)
         p.commit()
+        stack_of_parents.extend(reversed(children_to_process))
     for src_elem, clones in clones_map.items():
         for clone in clones + [src_elem]:
             if not clone.text and not clone.tail and not clone.get('id') and not clone.get('name'):