Refactor PR to use pre-parsed SMIL metadata

2025-07-09 03:04:10 -04:00 · 2023-10-24 11:29:27 +05:30 · 2023-10-24 11:29:27 +05:30 · 4755458dcd
commit 4755458dcd
parent 67fe848720
10 changed files with 348 additions and 230 deletions
--- a/src/calibre/srv/render_book.py
+++ b/src/calibre/srv/render_book.py
@ -354,9 +354,12 @@ def transform_smil(container, name, link_uid, virtualize_resources, virtualized_
    text_tag, audio_tag = SMIL('text'), SMIL('audio')
    body_tag, seq_tag, par_tag = SMIL('body'), SMIL('seq'), SMIL('par')
    type_attr, textref_attr = EPUB('type'), EPUB('textref')
    parnum = 0
-    def make_par(par):
+    def make_par(par, target):
-        ans = {}
+        nonlocal parnum
        parnum += 1
        ans = {'num': parnum}
        t = par.get(type_attr)
        if t:
            ans['type'] = t
@ -364,7 +367,10 @@ def transform_smil(container, name, link_uid, virtualize_resources, virtualized_
            if child.tag == text_tag:
                src = child.get('src')
                if src:
-                    ans['text'] = [container.href_to_name(child.get('src'), name), src.partition('#')[2]]
+                    q = container.href_to_name(src, name)
                    if q != target:
                        return {}  # the par must match the textref of the parent seq
                    ans['anchor'] = src.partition('#')[2]
            elif child.tag == audio_tag:
                src = child.get('src')
                if src:
@ -386,10 +392,15 @@ def transform_smil(container, name, link_uid, virtualize_resources, virtualized_
            parent_seq = smil_map.get(target)
            if parent_seq is None:
                smil_map[target] = parent_seq = {'textref': [target, ''], 'par':[], 'seq':[], 'type': 'root'}
        else:
            if parent_seq['textref'][0] != target:
                return  # child seqs must be in the same HTML file as parent
        parent_seq['seq'].append(seq)
        for child in seq_xml_element.iterchildren('*'):
            if child.tag == par_tag:
-                seq['par'].append(make_par(child))
+                p = make_par(child, target)
                if p.get('audio'):
                    seq['par'].append(p)
            elif child.tag == seq_tag:
                tref = child.get(textref_attr)
                if tref:
--- a/src/pyj/read_book/flow_mode.pyj
+++ b/src/pyj/read_book/flow_mode.pyj
@ -597,6 +597,9 @@ anchor_funcs = {
    'cmp': def cmp(a, b):
        return (a.block - b.block) or (a.inline - b.inline)
    ,
    'get_bounding_client_rect': def(elem):
        return elem.getBoundingClientRect()
    ,
 }
--- a/src/pyj/read_book/iframe.pyj
+++ b/src/pyj/read_book/iframe.pyj
@ -60,6 +60,7 @@ from read_book.settings import (
 from read_book.shortcuts import (
    create_shortcut_map, keyevent_as_shortcut, shortcut_for_key_event
 )
 from read_book.smil import flatten_smil_map, smil_element_at, mark_smil_element
 from read_book.toc import find_anchor_before_range, update_visible_toc_anchors
 from read_book.touch import (
    create_handlers as create_touch_handlers, reset_handlers as reset_touch_handlers
@ -67,7 +68,7 @@ from read_book.touch import (
 from read_book.viewport import scroll_viewport
 from select import (
    first_visible_word, is_start_closer_to_point, move_end_of_selection,
-    selection_extents, word_at_point, span_id_at_point, id_of_first_visible_span
+    selection_extents, word_at_point
 )
 from utils import debounce, is_ios
@ -259,6 +260,8 @@ class IframeBoss:
        set_toc_anchor_map()
        self.replace_history_on_next_cfi_update = True
        self.book = current_book.book = data.book
        self.smil_map = data.smil_map
        self.smil_anchor_map, self.smil_par_list = flatten_smil_map(self.smil_map)
        self.link_attr = 'data-' + self.book.manifest.link_uid
        self.reference_mode_enabled = data.reference_mode_enabled
        self.is_titlepage = data.is_titlepage
@ -475,6 +478,8 @@ class IframeBoss:
                window.setTimeout(def():
                    self.annotations_msg_received({'type': 'edit-highlight', 'uuid': ipos.uuid})
                , 5)
            elif ipos.type is 'smil_id':
                self.audio_ebook_msg_received({'type': 'play', 'anchor': ipos.anchor})
        spine = self.book.manifest.spine
        files = self.book.manifest.files
        spine_index = csi.index
@ -1009,35 +1014,30 @@ class IframeBoss:
    def audio_ebook_msg_received(self, data):
        if data.type is 'mark':
-            self.color_span_id(data.old_span_id, data.span_id)
+            if data.anchor:
-        elif data.type is 'play':
+                self.last_search_at = window.performance.now()
-            if data.pos:
+                if mark_smil_element(data.anchor):
-                span_id = span_id_at_point(data.pos.x, data.pos.y)
+                    self.ensure_selection_boundary_visible()
-                self.send_message('audio_ebook_message', type='report-span-id', span_id=span_id)
+                    self.send_message('audio_ebook_message', type='marked', anchor=data.anchor, idx=data.idx)
                else:
                    self.send_message('audio_ebook_message', type='marked')
            else:
-                span_id = id_of_first_visible_span()
+                window.getSelection().removeAllRanges()
-                self.send_message('audio_ebook_message', type='report-span-id', span_id=span_id)
+                self.send_message('audio_ebook_message', type='marked')
-
+        elif data.type is 'play':
-        elif data.type is 'trigger-shortcut':
+            if data.anchor:
                pos = self.smil_anchor_map[data.anchor]
                if pos?:
                    par = self.smil_par_list[pos]
                else:
                    par = None
            else:
                par = smil_element_at(data.pos, self.smil_anchor_map, self.smil_par_list)
            self.send_message('audio_ebook_message', type='start_play_at', par=par or None, anchor=data.anchor or None)
        elif data.type is 'trigger_shortcut':
            self.on_handle_navigation_shortcut(data)
-
+        else:
-    def color_span_id(self, old_span_id, span_id):
+            console.error(f'Unknown audio ebook message type from main: {data.type}')
        def element_in_viewport(element):
            rect = element.getBoundingClientRect()
            return (
                rect.top >= 0 and
                rect.left >= 0 and
                rect.bottom <= (window.innerHeight or document.documentElement.clientHeight) and
                rect.right <= (window.innerWidth or document.documentElement.clientWidth)
            )
        element = document.getElementById(span_id)
        old_element = document.getElementById(old_span_id)
        if old_element:
            old_element.style.backgroundColor = ''
        if element:
            element.style.backgroundColor = window.getComputedStyle(document.documentElement, '::selection').backgroundColor
            if not element_in_viewport(element):
                scroll_to_elem(element)
    def hints_msg_received(self, data):
        if data.type is 'show':
--- a/src/pyj/read_book/paged_mode.pyj
+++ b/src/pyj/read_book/paged_mode.pyj
@ -879,6 +879,7 @@ anchor_funcs = {
    'cmp': def cmp(a, b):
        return a - b
    ,
    'get_bounding_client_rect': get_bounding_client_rect,
 }
--- a/src/pyj/read_book/read_aloud.pyj
+++ b/src/pyj/read_book/read_aloud.pyj
@ -8,7 +8,7 @@ from book_list.globals import get_session_data
 from book_list.theme import get_color
 from dom import clear, svgicon, unique_id
 from gettext import gettext as _
-from read_book.globals import runtime, ui_operations
+from read_book.globals import ui_operations
 from read_book.highlights import ICON_SIZE
 from read_book.selection_bar import BUTTON_MARGIN, get_margins, map_to_iframe_coords
 from read_book.shortcuts import shortcut_for_key_event
@ -58,7 +58,7 @@ class ReadAloud:
    @property
    def supports_css_min_max(self):
-        return not runtime.is_standalone_viewer or runtime.QT_VERSION >= 0x050f00
+        return True
    @property
    def bar(self):
--- a/src/pyj/read_book/read_audio_ebook.pyj
+++ b/src/pyj/read_book/read_audio_ebook.pyj
@ -2,32 +2,28 @@
 # License: GPL v3 Copyright: 2023, DO LE DUY <duy.dole.00ece at gmail.com>
-# The key difference between an ePub with SMIL audio synchronization (EPUB3 with Media Overlays) and a regular ePub is the inclusion of SMIL files and audio content:
+# Public domain audio eBooks can be found on https://www.readbeyond.it/ebooks.html.
-
+# ReadBeyond also offers Aeneas (https://github.com/readbeyond/aeneas),
-# SMIL Files: ePub with SMIL includes SMIL (Synchronized Multimedia Integration Language) files, XML documents that define audio and text synchronization.
+# an open-source tool for force-alignment of audio and text to generate smil files.
-
+# Another notable tool is https://github.com/r4victor/syncabook,
-# Audio Content: It contains audio files that match eBook sections, referenced in SMIL files for synchronized playback.
+# builds upon Aeneas to complete a workflow for creating EPUB3 with Media Overlays.
 # Text Content: The textual content, often in HTML or XHTML files, remains similar to regular ePub. Text and audio are linked using <span> tags with unique IDs.
 # SMIL, audio, and text files are organized into folders, usually inside the epub/ or OEBPS/ folder. Sometimes, SMIL files may be placed in text folders. In this program we assume that each spoken text file corresponds to one audio file and one SMIL file.  
 # Public domain audio eBooks can be found on https://www.readbeyond.it/ebooks.html. ReadBeyond also offers Aeneas (https://github.com/readbeyond/aeneas), an open-source tool for force-alignment of audio and text to generate smil files. Another notable tool is https://github.com/r4victor/syncabook, builds upon Aeneas to complete a workflow for creating EPUB3 with Media Overlays.
 from __python__ import bound_methods, hash_literals
 from elementmaker import E
-from book_list.globals import get_session_data
+from dom import change_icon_image, svgicon, unique_id
 from dom import svgicon, unique_id, change_icon_image, clear
 from gettext import gettext as _
-from book_list.theme import get_color
+from modals import error_dialog
-from read_book.globals import runtime, ui_operations, current_spine_item, current_book
+from read_book.globals import ui_operations
 from read_book.highlights import ICON_SIZE
 from read_book.selection_bar import BUTTON_MARGIN, get_margins, map_to_iframe_coords
 from read_book.shortcuts import shortcut_for_key_event
-from modals import question_dialog
+from read_book.smil import (
    find_next_audio_in_spine, get_smil_id_for_timestamp, next_audio_file_for_spine_item
 )
 HIDDEN, PLAYING, PAUSED = 'HIDDEN', 'PLAYING', 'PAUSED'
 class ReadAudioEbook:
@ -36,12 +32,17 @@ class ReadAudioEbook:
    def __init__(self, view):
        self.view = view
-        self.parser = new window.DOMParser()
+        self._state = HIDDEN
-        self._state = "HIDDEN"
+        self.initialized = False
        self.current_audio_src = ''
    def initialize(self):
        if self.initialized:
            return
        self.initialized = True
        self.bar_id = unique_id("bar")
        self.overlay_off = False
-        self.container.style.height = "100%"
+
        container = self.container
        container.style.transition = "height 0.5s ease-in-out"
        container.style.backgroundColor = "rgba(127, 127, 127, 0.05)"
@ -49,7 +50,7 @@ class ReadAudioEbook:
        container.appendChild(E.div(
            id=self.bar_id,
            style="position: absolute; bottom: 0; width: 90%; height: 2em; border-radius: 1em; padding:0.5em; display: flex; justify-content: center; align-items: center; background-color: rgba(127, 127, 127, 0.3); "
-        )) 
+        ))
        container.addEventListener("keydown", self.on_keydown, {"passive": False})
        container.addEventListener("click", self.on_container_clicked, {"passive": False})
        container.addEventListener("contextmenu", self.toggle, {"passive": False})
@ -92,119 +93,112 @@ class ReadAudioEbook:
                create_button("faster", "faster", _("Speed up audio")),
                create_button("hide", "off", _("Close Read Audio-Ebook"))
            )
-            
+
        ]:
            bar_container.appendChild(x)
-        self.audio_buttons = document.getElementById("audioButtons") 
+        self.audio_buttons = document.getElementById("audioButtons")
-        self.progress_bar = document.getElementById("progressBar") 
+        self.progress_bar = document.getElementById("progressBar")
        self.time_display = document.getElementById("timeDisplay")
-        self.audio_id = unique_id("audio")
+        self.container.appendChild(E.audio(id=unique_id('audio'), style="display:none"))
-        self.container.appendChild(E.audio(
+        ap = self.container.lastChild
-            id=self.audio_id,
+        self.audio_player_id = ap.id
            style="display:none"
        )) 
-        self.audio_player = document.getElementById(self.audio_id) 
+        ap.addEventListener("timeupdate", def():
-
+            if self.state is not HIDDEN:
-        self.audio_player.addEventListener("timeupdate", def():
+                ap = self.audio_player
-            if self.state != "HIDDEN":
+                if ap.duration:
-                if self.audio_player.duration:
+                    audio_current_time = ap.currentTime
-                    audio_current_time = self.audio_player.currentTime
+                    progress = (audio_current_time / ap.duration) * 100
                    progress = (audio_current_time / self.audio_player.duration) * 100
                    self.progress_bar.firstChild.style.width = progress + "%"
-
+                    self.time_display.textContent = f"{self.seconds_to_ms(audio_current_time)}/{self.seconds_to_ms(ap.duration)}"
-                    self.time_display.textContent = f"{self.seconds_to_ms(audio_current_time)}/{self.seconds_to_ms(self.audio_player.duration)}"
+                    self.mark_for_timeupdate(audio_current_time)
                    span_id = self.find_span_id_for_time(audio_current_time)
                    if span_id != self.span_id:
                        old_span_id = self.span_id
                        self.span_id = span_id
                        self.send_message("mark", old_span_id=old_span_id, span_id = self.span_id)
                else:
                    self.time_display.textContent = "00:00"
                    self.progress_bar.firstChild.style.width = "0%"
        )
-        self.audio_player.addEventListener("ended", def():
+        ap.addEventListener("ended", def():
-            self.view.show_next_spine_item()
+            self.play_next_audio_file()
-        )  
+        )
        self.progress_bar.addEventListener("click", def(event):
-            if self.audio_player.duration:
+            ap = self.audio_player
            if ap.duration:
                rect = self.progress_bar.getBoundingClientRect()
                clickX = event.clientX - rect.left
                total_width = rect.width
-                skip_time = (clickX / total_width) * self.audio_player.duration
+                skip_time = (clickX / total_width) * ap.duration
-                self.audio_player.currentTime = skip_time
+                ap.currentTime = skip_time
        )
    def mark_for_timeupdate(self, audio_time):
        sam = self.view.currently_showing.smil_audio_map
        if sam:
            smil_id, idx = get_smil_id_for_timestamp(self.current_audio_src, audio_time, sam, self.last_marked_smil_idx)
            if smil_id and smil_id is not self.last_marked_smil_id:
                self.send_message('mark', anchor=smil_id, idx=idx)
-    def parse_smil_file(self, smil_content, smil_name, mimetype):
+    def play_next_audio_file(self):
-        # Extract information from the parsed XML
+        next_audio_file, par = next_audio_file_for_spine_item(self.current_audio_src, self.view.currently_showing.smil_audio_map)
-        smil_content.text().then(def(data): 
+        if next_audio_file:
-            xml_doc = self.parser.parseFromString(data, "text/xml")
+            self.set_audio_src(next_audio_file, def():
-            audio_map = {}
+                self.audio_player.currentTime = par.audio?.start or 0
-            par_elements = xml_doc.getElementsByTagName("par")
+                self.play()
            audio_element = par_elements[0].getElementsByTagName("audio")[0]
            audio_file = audio_element.getAttribute("src")
            for par_element in par_elements:
                text_element = par_element.getElementsByTagName("text")[0]
                audio_element = par_element.getElementsByTagName("audio")[0]
                if text_element and audio_element:
                    span_id = text_element.getAttribute("src").split("#")[1]
                    audio_details = {
                        "clipBegin": audio_element.getAttribute("clipBegin"),
                        "clipEnd": audio_element.getAttribute("clipEnd")
                    }
                    audio_map[span_id] = audio_details
            self.audio_maps[smil_name[:-5].replace("smil", "text")] = [audio_map, audio_file]
        )
    def change_audio_src(self):
        window.URL.revokeObjectURL(self.audio_player.src)
        self.audio_player.setAttribute("src", "")
        if self.audio_maps[current_spine_item()]:
            self.audio_map = self.audio_maps[current_spine_item()]
            link = self.audio_files[0].split("/")[0] + self.audio_maps[current_spine_item()][1][2:]
            ui_operations.get_file(
                current_book(), link, def(blob, name, mimetype):
                    self.pause()
                    blob_url = window.URL.createObjectURL(blob)
                    self.audio_player.src = blob_url
                    self.send_message("play")
            )
-            # self.pause()
+            return
-            # self.audio_player.src = "book/" + link
+        spine_name, par = find_next_audio_in_spine(self.view.currently_showing.name, self.view.book.manifest)
-            # self.send_message("play")
+        if spine_name:
-
+            self.view.show_name(spine_name, initial_position={'type': 'smil_id', 'anchor': par.anchor})
        else:
-            if self.state is "PLAYING":
+            self.hide()
-                if self.skip_section:
+
-                    self.view.show_next_spine_item()
+    def set_audio_src(self, name, proceed):
-                else:
+        if self.current_audio_src is name:
-                    self.pause()
+            proceed()
-                    question_dialog(_('Skip Section'), _('Do you want to automatically skip sections without audio?'), def (yes):
+            return
-                        if yes:
+        self.last_marked_smil_id = self.last_marked_smil_idx = None
-                            self.skip_section = True
+        self.current_audio_src = name
-                            self.play()
+        ap = self.audio_player
-                        else:
+        if ui_operations.get_url_for_book_file_name:
-                            self.hide()
+            ap.onloadeddata = def():
-                    )
+                proceed()
            ap.onerror = def(evt):
                console.error(evt)
                error_dialog(_('Could not load audio'), _(
                    'Could not load the audio file: {} with error: {}').format(name, evt.message))
            ap.src = ui_operations.get_url_for_book_file_name(name)
            return
        if ap.src:
            ap.onloadeddata = def():
                pass
            ap.onerror = def():
                pass
            window.URL.revokeObjectURL(ap.src)
            ap.src = ''
        ui_operations.get_file(self.view.book, name, def(blob, name, mimetype):
            ap = self.audio_player
            ap.onloadeddata = def():
                proceed()
            ap.onerror = def(evt):
                console.error(evt)
                error_dialog(_('Could not load audio'), _(
                    'Could not load the audio file: {} with error: {}').format(name, evt.message))
            ap.src = window.URL.createObjectURL(blob)
        )
    def start_playback(self):
        self.send_message('play')
    @property
    def container(self):
        return document.getElementById("audio-ebooks-overlay")
    @property
-    def supports_css_min_max(self):
+    def audio_player(self):
-        return not runtime.is_standalone_viewer or runtime.QT_VERSION >= 0x050f00
+        return document.getElementById(self.audio_player_id)
    @property
    def is_visible(self):
@ -216,24 +210,30 @@ class ReadAudioEbook:
    @state.setter
    def state(self, val):
-        if val is not self._state:
+        if val is not self._state :
-            self._state = val
+            if val is HIDDEN:
                self._state = HIDDEN
            elif val is PLAYING:
                self._state = PLAYING
            elif val is PAUSED:
                self._state = PAUSED
    def hide(self):
-        if self.state is not "HIDDEN":
+        if self.state is not HIDDEN:
-            self.send_message("mark", old_span_id=self.span_id)
+            self.send_message("mark")
            self.pause()
            self.container.style.display = "none"
            self.view.focus_iframe()
-            self.state = "HIDDEN"
+            self.state = HIDDEN
            if ui_operations.read_aloud_state_changed:
                ui_operations.read_aloud_state_changed(False)
    def show(self):
-        if self.state is "HIDDEN":
+        if self.state is HIDDEN:
-            self.state = "PLAYING"
+            self.initialize()
            self.state = PLAYING
            change_icon_image(document.getElementById("audio-ebook-bt-toggle"), "pause")
-            self.change_audio_src()
+            self.start_playback()
            self.container.style.display = "block"
            self.focus()
            if ui_operations.read_aloud_state_changed:
@ -244,28 +244,28 @@ class ReadAudioEbook:
    def slower(self):
        self.audio_player.playbackRate -= 0.1
-        
+
    def faster(self):
        self.audio_player.playbackRate += 0.1
    def play(self):
-        self.state = "PLAYING" 
+        self.state = PLAYING
        change_icon_image(document.getElementById("audio-ebook-bt-toggle"), "pause")
-        if self.audio_player.getAttribute("src"):
+        ap = self.audio_player
-            self.audio_player.play()
+        if ap.getAttribute("src"):
-        else:
+            ap.play()
-            self.view.show_next_spine_item()
+
    def pause(self):
-        self.state = "PAUSED"
+        self.state = PAUSED
        change_icon_image(document.getElementById("audio-ebook-bt-toggle"), "play")
-        if self.audio_player.getAttribute("src"):
+        ap = self.audio_player
-            self.audio_player.pause()
+        if ap.getAttribute("src"):
            ap.pause()
    def toggle(self):
-        if self.state is "PLAYING":
+        if self.state is PLAYING:
            self.pause()
-        elif self.state is "PAUSED":
+        elif self.state is PAUSED:
            self.play()
    def overlay(self):
@ -277,7 +277,7 @@ class ReadAudioEbook:
            self.overlay_off = True
            self.container.style.height = "3em"
            change_icon_image(document.getElementById("audio-ebook-bt-overlay"), "overlay-on")
-            
+
    def on_container_clicked(self, ev):
        if ev.button is not 0:
            return
@ -309,23 +309,7 @@ class ReadAudioEbook:
        elif sc_name is "quit":
            self.hide()
        elif sc_name in ("up", "down", "pageup", "pagedown", "left", "right"):
-            self.send_message("trigger-shortcut", name=sc_name)
+            self.send_message("trigger_shortcut", name=sc_name)
    def find_span_id_for_time(self, current_time):
        if self.audio_map:
            for span_id in self.audio_map[0]:
                clip_begin_time = self.convert_time_to_seconds(self.audio_map[0][span_id]["clipBegin"])
                clip_end_time = self.convert_time_to_seconds(self.audio_map[0][span_id]["clipEnd"])               
                if clip_begin_time <= current_time < clip_end_time:
                    return span_id
        return None
    def convert_time_to_seconds(self, time_string):
        parts = time_string.split(":")
        if len(parts) != 3:
            return 0
        hours, minutes, seconds = map(float, parts)
        return hours * 3600 + minutes * 60 + seconds
    def seconds_to_ms(self, seconds):
        minutes = Math.floor(seconds / 60)
@ -336,10 +320,26 @@ class ReadAudioEbook:
        self.view.iframe_wrapper.send_message("audio-ebook", type=message_type, **kw)
    def handle_message(self, message):
-        if message.type is "report-span-id":
+        if message.type is 'start_play_at':
-            if message.span_id:
+            if message.par:
-                old_span_id = self.span_id
+                self.set_audio_src(
-                self.span_id = message.span_id
+                    message.par.audio, def():
-                self.send_message("mark", old_span_id=old_span_id, span_id=self.span_id)
+                        self.audio_player.currentTime = message.par.start or 0
-                self.audio_player.currentTime = self.convert_time_to_seconds(self.audio_map[0][self.span_id]["clipBegin"])
+                        self.play()
-                self.play()
+                )
            else:
                if message.anchor:
                    # start playing from where we are
                    self.send_message('play')
                    return
                self.pause()
                error_dialog(_('Audio element not found'), _(
                    'Could not play audio as no associated audio was found'))
        elif message.type is 'marked':
            if message.anchor:
                self.last_marked_smil_id = message.anchor
                self.last_marked_smil_idx = message.idx
            else:
                self.last_marked_smil_id = self.last_marked_smil_idx = None
        else:
            console.error(f'Unknown audio ebook message type from iframe: {message.type}')
--- a/src/pyj/read_book/smil.pyj
+++ b/src/pyj/read_book/smil.pyj
@ -0,0 +1,132 @@
 # vim:fileencoding=utf-8
 # License: GPL v3 Copyright: 2023, Kovid Goyal <kovid at kovidgoyal.net>
 from __python__ import bound_methods, hash_literals
 from read_book.anchor_visibility import is_anchor_on_screen
 from read_book.globals import get_boss
 def flatten_seq(seq, par_list):
    if seq.par:
        for par in seq.par:
            if par.anchor:
                par_list.push(par)
    if seq.seq:
        for child in seq.seq:
            flatten_seq(child, par_list)
 def flatten_smil_map(smil_map):
    anchor_map = {}
    par_list = v'[]'
    if smil_map:
        flatten_seq(smil_map, par_list)
    par_list.sort(def (a, b): return a.num - b.num;)
    for i in range(par_list.length):
        anchor_map[par_list[i].anchor] = i
    return anchor_map, par_list
 def get_smil_audio_map(smil_map):
    audio_map = {}
    def flatten(seq):
        if seq.par:
            for par in seq.par:
                if par.audio:
                    a = audio_map[par.audio]
                    if not a:
                        a = audio_map[par.audio] = v'[]'
                    a.push(par)
        if seq.seq:
            for child in seq.seq:
                flatten(child)
    flatten(smil_map)
    for v in Object.values(audio_map):
        v.sort(def(a, b): return a.start - b.start;)
    return {'audio_files': audio_map}
 def get_smil_id_for_timestamp(audio_file_name, timestamp, smil_audio_map, prev_idx):
    pars = smil_audio_map.audio_files[audio_file_name]
    if not pars:
        return None, None
    prev_idx = prev_idx or 0
    if prev_idx >= pars.length or prev_idx < 0:
        prev_idx = 0
    for i in range(prev_idx, pars.length):
        if pars[i].start <= timestamp <= pars[i].end:
            return pars[i].anchor, i
    for i in range(0, prev_idx):
        if pars[i].start <= timestamp <= pars[i].end:
            return pars[i].anchor, i
    return None, None
 def next_audio_file_for_spine_item(audio_file_name, smil_audio_map):
    if audio_file_name:
        keys = Object.keys(smil_audio_map)
        idx = keys.indexOf(audio_file_name)
        if 0 <= idx < keys.length - 1:
            return keys[idx+1], smil_audio_map[keys[idx+1]]
    return None, None
 def first_par(smil_map):
    par_list = flatten_smil_map(smil_map)[1]
    for par in par_list:
        if par.anchor:
            return par
    return None
 def find_next_audio_in_spine(current_name, book_manifest):
    spine = book_manifest.spine
    file_map = book_manifest.files
    spine_idx = spine.indexOf(current_name)
    for i in range(spine_idx + 1, spine.length):
        q = spine[i]
        f = file_map[q]
        if f and f.smil_map:
            par = first_par(f.smil_map)
            if par:
                return q, par
    return None, None
 def smil_element_at(pos, anchor_map, par_list):
    if pos:
        # first see if we get lucky
        elem = document.elementFromPoint(pos.x, pos.y)
        if elem and elem.id and anchor_map[elem.id]?:
            return par_list[anchor_map[elem.id]]
        # now try to find a par that intersects pos
        af = get_boss().anchor_funcs
        for par in par_list:
            if par.anchor and par.audio:
                elem = document.getElementById(par.anchor)
                if elem:
                    br = af.get_bounding_client_rect(elem)
                    if br.x <= pos.x <= (br.x + br.width) and br.y <= pos.y <= (br.y + br.height):
                        return par
        # fallback to first visible anchor
        return smil_element_at(None, anchor_map, par_list)
    else: # use first visible anchor
        for par in par_list:
            if par.anchor and par.audio and is_anchor_on_screen(par.anchor):
                return par
        # just use first par
        for par in par_list:
            if par.audio:
                return par
        return None
 def mark_smil_element(anchor):
    elem = document.getElementById(anchor)
    if elem:
        sel = window.getSelection()
        sel.selectAllChildren(elem)
        return bool(sel.rangeCount and sel.toString())
    return False
--- a/src/pyj/read_book/view.pyj
+++ b/src/pyj/read_book/view.pyj
@ -40,6 +40,7 @@ from read_book.scrollbar import BookScrollbar
 from read_book.search import SearchOverlay
 from read_book.selection_bar import SelectionBar
 from read_book.shortcuts import create_shortcut_map
 from read_book.smil import get_smil_audio_map
 from read_book.timers import Timers
 from read_book.toc import get_current_toc_nodes, update_visible_toc_nodes
 from read_book.touch import set_left_margin_handler, set_right_margin_handler
@ -328,8 +329,9 @@ class View:
        self.overlay = Overlay(self)
        self.selection_bar = SelectionBar(self)
        self.read_aloud = ReadAloud(self)
        self.read_audio_ebook = ReadAudioEbook(self)
        self.hints = Hints(self)
-        self.modal_overlays = v'[self.selection_bar, self.read_aloud, self.hints]'
+        self.modal_overlays = v'[self.selection_bar, self.read_aloud, self.hints, self.read_audio_ebook]'
        self.processing_spine_item_display = False
        self.pending_load = None
        self.currently_showing = {'selection': {'empty': True}, 'on_load':v'[]'}
@ -679,7 +681,7 @@ class View:
        self.iframe.contentWindow.focus()
    def start_read_aloud(self, dont_start_talking):
-        if self.is_audio_ebook:
+        if self.book.manifest.has_smil:
            for x in self.modal_overlays:
                if x is not self.read_audio_ebook:
                    x.hide()
@ -937,7 +939,6 @@ class View:
        self.book = current_book.book = book
        hl = None
        if not is_redisplay:
            self.is_audio_ebook = undefined
            if self.read_audio_ebook:
                self.read_audio_ebook.hide()
                clear(self.read_audio_ebook.container)
@ -993,26 +994,6 @@ class View:
                show_controls_help()
                sd.set('controls_help_shown_count' + ('_rtl_page_progression' if rtl_page_progression() else ''), c + 1)
        if self.is_audio_ebook is undefined:
            smil_files = []
            audio_files = []
            for filename in book.manifest.files:
                if filename.endswith(".smil"):
                    smil_files.append(filename)
                elif book.manifest.files[filename].mimetype is "audio/mpeg":
                    audio_files.append(filename)
            if len(smil_files) > 0:
                self.is_audio_ebook = True
                self.read_audio_ebook = ReadAudioEbook(self)
                self.read_audio_ebook.smil_files = smil_files
                self.read_audio_ebook.audio_files = audio_files
                self.read_audio_ebook.audio_maps = {}
                for smil_file in smil_files:
                    ui_operations.get_file(
                        self.book, smil_file, self.read_audio_ebook.parse_smil_file)
            else:
                self.is_audio_ebook = False
    def preferences_changed(self):
        self.set_margins()
        ui_operations.update_url_state(True)
@ -1089,8 +1070,6 @@ class View:
            self.loaded_resources = resource_data
            done_callback(resource_data)
        load_resources(self.book, name, self.loaded_resources, cb)
        if self.is_audio_ebook and self.read_audio_ebook.state != 'HIDDEN':
            window.setTimeout(self.read_audio_ebook.change_audio_src, 1000) # wait for previous spine to update
    def goto_doc_boundary(self, start):
        name = self.book.manifest.spine[0 if start else self.book.manifest.spine.length - 1]
@ -1396,9 +1375,14 @@ class View:
        self.processing_spine_item_display = True
        self.current_status_message = ''
        self.iframe.style.visibility = 'hidden'
        fdata = self.book.manifest.files[self.currently_showing.name]
        smil_map = None
        if self.book.manifest.has_smil and fdata:
            smil_map = fdata.smil_map
            self.currently_showing.smil_audio_map = get_smil_audio_map(smil_map)
        self.iframe_wrapper.send_unencrypted_message('display',
            resource_data=resource_data, book=self.book, name=self.currently_showing.name,
-            initial_position=self.currently_showing.initial_position,
+            initial_position=self.currently_showing.initial_position, smil_map=smil_map or None,
            settings=self.currently_showing.settings, reference_mode_enabled=self.reference_mode_enabled,
            is_titlepage=self.currently_showing.name is self.book.manifest.title_page_name,
            highlights=self.annotations_manager.highlights_for_currently_showing(),
--- a/src/pyj/select.pyj
+++ b/src/pyj/select.pyj
@ -53,24 +53,6 @@ def first_visible_word():
            if r?:
                return r
 def span_id_at_point(x, y):
    elements = document.elementsFromPoint(x, y)
    for element in elements:
        spans = element.querySelectorAll('span[id]')
        if len(spans) > 0:
            return spans[0].id
 def id_of_first_visible_span():
    width = window.innerWidth
    height = window.innerHeight
    xdelta = width // 10
    ydelta = height // 10
    for y in range(0, height, ydelta):
        for x in range(0, width, xdelta):
            span_id = span_id_at_point(x, y)
            if span_id:
                return span_id
 def empty_range_extents():
    return {
        'start': {'x': 0, 'y': 0, 'height': 0, 'width': 0, 'onscreen': False, 'selected_prev': False},
--- a/src/pyj/viewer-main.pyj
+++ b/src/pyj/viewer-main.pyj
@ -6,7 +6,7 @@ from elementmaker import E
 import initialize  # noqa: unused-import
 import traceback
-from ajax import ajax, workaround_qt_bug
+from ajax import absolute_path, ajax, workaround_qt_bug
 from book_list.globals import get_session_data, set_session_data
 from book_list.library_data import library_data
 from book_list.theme import css_for_variables, get_color
@ -56,6 +56,10 @@ def file_received(name, file_data, proceed, end_type, xhr, ev):
    proceed(result, name, file_data.mimetype, book)
 def get_url_for_book_file_name(name):
    return absolute_path('book/' + name)
 def get_file(book, name, proceed):
    entry = book.manifest.files[name]
    if not entry:
@ -322,6 +326,7 @@ if window is window.top:
    if TRANSLATIONS_DATA:
        install(TRANSLATIONS_DATA)
    ui_operations.get_file = get_file
    ui_operations.get_url_for_book_file_name = get_url_for_book_file_name
    ui_operations.get_mathjax_files = get_mathjax_files
    ui_operations.update_url_state = update_url_state
    ui_operations.show_error = show_error