feat: Add support for EPUB with Media Overlays

This commit is contained in:
duydl 2023-10-13 12:00:31 +09:00 committed by Kovid Goyal
parent 82fa0ba9ba
commit 67fe848720
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
8 changed files with 447 additions and 9 deletions

1
imgsrc/srv/off.svg Normal file
View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" width="1792" height="1792" viewBox="-50 0 1300 1200"><path fill="currentColor" d="M513.94 0v693.97h172.12V0H513.94zM175.708 175.708C67.129 284.287 0 434.314 0 600c0 331.371 268.629 600 600 600s600-268.629 600-600c0-165.686-67.13-315.713-175.708-424.292l-120.85 120.85c77.66 77.658 125.684 184.952 125.684 303.442c0 236.981-192.146 429.126-429.126 429.126c-236.981 0-429.126-192.145-429.126-429.126c0-118.49 48.025-225.784 125.684-303.442l-120.85-120.85z"/></svg>

After

Width:  |  Height:  |  Size: 502 B

View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" width="1792" height="1792" viewBox="0 0 24 24"><path fill="currentColor" d="m19.775 22.6l-1.6-1.6H5q-.825 0-1.413-.588T3 19V5.825L1.4 4.2l1.4-1.4l18.4 18.4l-1.425 1.4ZM5 19h11.175L5 7.825V19Zm16-.825l-2-2V8h-8.175l-5-5H19q.825 0 1.413.588T21 5v13.175Z"/></svg>

After

Width:  |  Height:  |  Size: 300 B

View File

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" width="1792" height="1792" viewBox="0 0 32 32"><path fill="currentColor" d="M28 8h-4V4a2.002 2.002 0 0 0-2-2H4a2.002 2.002 0 0 0-2 2v18a2.002 2.002 0 0 0 2 2h4v4a2.002 2.002 0 0 0 2 2h18a2.002 2.002 0 0 0 2-2V10a2.002 2.002 0 0 0-2-2ZM4 22V4h18v4H10a2.002 2.002 0 0 0-2 2v12Zm18 0h-2.586L10 12.586V10h2.586L22 19.416Zm-12-6.586L16.586 22H10Zm12.001 1.173L15.414 10H22ZM10 28v-4h12a2.002 2.002 0 0 0 2-2V10h4v18Z"/></svg>

After

Width:  |  Height:  |  Size: 460 B

View File

@ -67,7 +67,7 @@ from read_book.touch import (
from read_book.viewport import scroll_viewport
from select import (
first_visible_word, is_start_closer_to_point, move_end_of_selection,
selection_extents, word_at_point
selection_extents, word_at_point, span_id_at_point, id_of_first_visible_span
)
from utils import debounce, is_ios
@ -164,6 +164,7 @@ class IframeBoss:
'handle_navigation_shortcut': self.on_handle_navigation_shortcut,
'annotations': self.annotations_msg_received,
'tts': self.tts_msg_received,
'audio-ebook': self.audio_ebook_msg_received,
'hints': self.hints_msg_received,
'copy_selection': self.copy_selection,
'replace_highlights': self.replace_highlights,
@ -1006,6 +1007,38 @@ class IframeBoss:
if select_tts_mark(occurrence_number):
self.ensure_selection_boundary_visible()
def audio_ebook_msg_received(self, data):
if data.type is 'mark':
self.color_span_id(data.old_span_id, data.span_id)
elif data.type is 'play':
if data.pos:
span_id = span_id_at_point(data.pos.x, data.pos.y)
self.send_message('audio_ebook_message', type='report-span-id', span_id=span_id)
else:
span_id = id_of_first_visible_span()
self.send_message('audio_ebook_message', type='report-span-id', span_id=span_id)
elif data.type is 'trigger-shortcut':
self.on_handle_navigation_shortcut(data)
def color_span_id(self, old_span_id, span_id):
def element_in_viewport(element):
rect = element.getBoundingClientRect()
return (
rect.top >= 0 and
rect.left >= 0 and
rect.bottom <= (window.innerHeight or document.documentElement.clientHeight) and
rect.right <= (window.innerWidth or document.documentElement.clientWidth)
)
element = document.getElementById(span_id)
old_element = document.getElementById(old_span_id)
if old_element:
old_element.style.backgroundColor = ''
if element:
element.style.backgroundColor = window.getComputedStyle(document.documentElement, '::selection').backgroundColor
if not element_in_viewport(element):
scroll_to_elem(element)
def hints_msg_received(self, data):
if data.type is 'show':
# clear selection so that it does not confuse with the hints which use the same colors

View File

@ -144,7 +144,7 @@ class ReadAloud:
bar.appendChild(cb('slower', 'slower', _('Slow down speech')))
bar.appendChild(cb('faster', 'faster', _('Speed up speech')))
bar.appendChild(cb('configure', 'cogs', _('Configure Read aloud')))
bar.appendChild(cb('hide', 'close', _('Close Read aloud')))
bar.appendChild(cb('hide', 'off', _('Close Read aloud')))
if self.state is not WAITING_FOR_PLAY_TO_START:
notes_container = bar_container.lastChild
notes_container.style.display = notes_container.previousSibling.style.display = 'block'

View File

@ -0,0 +1,345 @@
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2023, DO LE DUY <duy.dole.00ece at gmail.com>
# The key difference between an ePub with SMIL audio synchronization (EPUB3 with Media Overlays) and a regular ePub is the inclusion of SMIL files and audio content:
# SMIL Files: ePub with SMIL includes SMIL (Synchronized Multimedia Integration Language) files, XML documents that define audio and text synchronization.
# Audio Content: It contains audio files that match eBook sections, referenced in SMIL files for synchronized playback.
# Text Content: The textual content, often in HTML or XHTML files, remains similar to regular ePub. Text and audio are linked using <span> tags with unique IDs.
# SMIL, audio, and text files are organized into folders, usually inside the epub/ or OEBPS/ folder. Sometimes, SMIL files may be placed in text folders. In this program we assume that each spoken text file corresponds to one audio file and one SMIL file.
# Public domain audio eBooks can be found on https://www.readbeyond.it/ebooks.html. ReadBeyond also offers Aeneas (https://github.com/readbeyond/aeneas), an open-source tool for force-alignment of audio and text to generate smil files. Another notable tool is https://github.com/r4victor/syncabook, builds upon Aeneas to complete a workflow for creating EPUB3 with Media Overlays.
from __python__ import bound_methods, hash_literals
from elementmaker import E
from book_list.globals import get_session_data
from dom import svgicon, unique_id, change_icon_image, clear
from gettext import gettext as _
from book_list.theme import get_color
from read_book.globals import runtime, ui_operations, current_spine_item, current_book
from read_book.highlights import ICON_SIZE
from read_book.selection_bar import BUTTON_MARGIN, get_margins, map_to_iframe_coords
from read_book.shortcuts import shortcut_for_key_event
from modals import question_dialog
class ReadAudioEbook:
dont_hide_on_content_loaded = True
def __init__(self, view):
self.view = view
self.parser = new window.DOMParser()
self._state = "HIDDEN"
self.bar_id = unique_id("bar")
self.overlay_off = False
self.container.style.height = "100%"
container = self.container
container.style.transition = "height 0.5s ease-in-out"
container.style.backgroundColor = "rgba(127, 127, 127, 0.05)"
container.setAttribute("tabindex", "0")
container.appendChild(E.div(
id=self.bar_id,
style="position: absolute; bottom: 0; width: 90%; height: 2em; border-radius: 1em; padding:0.5em; display: flex; justify-content: center; align-items: center; background-color: rgba(127, 127, 127, 0.3); "
))
container.addEventListener("keydown", self.on_keydown, {"passive": False})
container.addEventListener("click", self.on_container_clicked, {"passive": False})
container.addEventListener("contextmenu", self.toggle, {"passive": False})
bar_container = self.bar = document.getElementById(self.bar_id)
def create_button(name, icon, text):
ans = svgicon(icon, ICON_SIZE, ICON_SIZE, text)
if name:
ans.addEventListener("click", def(ev):
ev.stopPropagation(), ev.preventDefault()
self[name](ev)
self.view.focus_iframe()
)
ans.id = "audio-ebook-bt-" + name
ans.classList.add("simple-link")
ans.style.marginLeft = ans.style.marginRight = BUTTON_MARGIN
return ans
for x in [
E.div (
id="audioButtons",
style='height: 3ex; display: flex; align-items: center; justify-content: center',
create_button("toggle", "pause", _("Toggle pause & play")),
create_button("overlay", "overlay-off", _("Toggle overlay for scrolling & text selection")),
),
E.div(
id="timeDisplay",
E.text("")
),
E.div(
id="progressBar",
style="height:1.5em; display:block; background-color:rgba(255, 255, 255, 0.7); width:70%; margin:1em",
E.div(
style="display:block; background-color:rgba(0, 0, 0, 0.3); height:100%")
),
E.div(
style='height: 3ex; display: flex; align-items: center; justify-content: center',
create_button("slower", "slower", _("Slow down audio")),
create_button("faster", "faster", _("Speed up audio")),
create_button("hide", "off", _("Close Read Audio-Ebook"))
)
]:
bar_container.appendChild(x)
self.audio_buttons = document.getElementById("audioButtons")
self.progress_bar = document.getElementById("progressBar")
self.time_display = document.getElementById("timeDisplay")
self.audio_id = unique_id("audio")
self.container.appendChild(E.audio(
id=self.audio_id,
style="display:none"
))
self.audio_player = document.getElementById(self.audio_id)
self.audio_player.addEventListener("timeupdate", def():
if self.state != "HIDDEN":
if self.audio_player.duration:
audio_current_time = self.audio_player.currentTime
progress = (audio_current_time / self.audio_player.duration) * 100
self.progress_bar.firstChild.style.width = progress + "%"
self.time_display.textContent = f"{self.seconds_to_ms(audio_current_time)}/{self.seconds_to_ms(self.audio_player.duration)}"
span_id = self.find_span_id_for_time(audio_current_time)
if span_id != self.span_id:
old_span_id = self.span_id
self.span_id = span_id
self.send_message("mark", old_span_id=old_span_id, span_id = self.span_id)
else:
self.time_display.textContent = "00:00"
self.progress_bar.firstChild.style.width = "0%"
)
self.audio_player.addEventListener("ended", def():
self.view.show_next_spine_item()
)
self.progress_bar.addEventListener("click", def(event):
if self.audio_player.duration:
rect = self.progress_bar.getBoundingClientRect()
clickX = event.clientX - rect.left
total_width = rect.width
skip_time = (clickX / total_width) * self.audio_player.duration
self.audio_player.currentTime = skip_time
)
def parse_smil_file(self, smil_content, smil_name, mimetype):
# Extract information from the parsed XML
smil_content.text().then(def(data):
xml_doc = self.parser.parseFromString(data, "text/xml")
audio_map = {}
par_elements = xml_doc.getElementsByTagName("par")
audio_element = par_elements[0].getElementsByTagName("audio")[0]
audio_file = audio_element.getAttribute("src")
for par_element in par_elements:
text_element = par_element.getElementsByTagName("text")[0]
audio_element = par_element.getElementsByTagName("audio")[0]
if text_element and audio_element:
span_id = text_element.getAttribute("src").split("#")[1]
audio_details = {
"clipBegin": audio_element.getAttribute("clipBegin"),
"clipEnd": audio_element.getAttribute("clipEnd")
}
audio_map[span_id] = audio_details
self.audio_maps[smil_name[:-5].replace("smil", "text")] = [audio_map, audio_file]
)
def change_audio_src(self):
window.URL.revokeObjectURL(self.audio_player.src)
self.audio_player.setAttribute("src", "")
if self.audio_maps[current_spine_item()]:
self.audio_map = self.audio_maps[current_spine_item()]
link = self.audio_files[0].split("/")[0] + self.audio_maps[current_spine_item()][1][2:]
ui_operations.get_file(
current_book(), link, def(blob, name, mimetype):
self.pause()
blob_url = window.URL.createObjectURL(blob)
self.audio_player.src = blob_url
self.send_message("play")
)
# self.pause()
# self.audio_player.src = "book/" + link
# self.send_message("play")
else:
if self.state is "PLAYING":
if self.skip_section:
self.view.show_next_spine_item()
else:
self.pause()
question_dialog(_('Skip Section'), _('Do you want to automatically skip sections without audio?'), def (yes):
if yes:
self.skip_section = True
self.play()
else:
self.hide()
)
@property
def container(self):
return document.getElementById("audio-ebooks-overlay")
@property
def supports_css_min_max(self):
return not runtime.is_standalone_viewer or runtime.QT_VERSION >= 0x050f00
@property
def is_visible(self):
return self.container.style.display is not "none"
@property
def state(self):
return self._state
@state.setter
def state(self, val):
if val is not self._state:
self._state = val
def hide(self):
if self.state is not "HIDDEN":
self.send_message("mark", old_span_id=self.span_id)
self.pause()
self.container.style.display = "none"
self.view.focus_iframe()
self.state = "HIDDEN"
if ui_operations.read_aloud_state_changed:
ui_operations.read_aloud_state_changed(False)
def show(self):
if self.state is "HIDDEN":
self.state = "PLAYING"
change_icon_image(document.getElementById("audio-ebook-bt-toggle"), "pause")
self.change_audio_src()
self.container.style.display = "block"
self.focus()
if ui_operations.read_aloud_state_changed:
ui_operations.read_aloud_state_changed(True)
def focus(self):
self.container.focus()
def slower(self):
self.audio_player.playbackRate -= 0.1
def faster(self):
self.audio_player.playbackRate += 0.1
def play(self):
self.state = "PLAYING"
change_icon_image(document.getElementById("audio-ebook-bt-toggle"), "pause")
if self.audio_player.getAttribute("src"):
self.audio_player.play()
else:
self.view.show_next_spine_item()
def pause(self):
self.state = "PAUSED"
change_icon_image(document.getElementById("audio-ebook-bt-toggle"), "play")
if self.audio_player.getAttribute("src"):
self.audio_player.pause()
def toggle(self):
if self.state is "PLAYING":
self.pause()
elif self.state is "PAUSED":
self.play()
def overlay(self):
if self.overlay_off:
self.overlay_off = False
self.container.style.height = "100%"
change_icon_image(document.getElementById("audio-ebook-bt-overlay"), "overlay-off")
else:
self.overlay_off = True
self.container.style.height = "3em"
change_icon_image(document.getElementById("audio-ebook-bt-overlay"), "overlay-on")
def on_container_clicked(self, ev):
if ev.button is not 0:
return
ev.stopPropagation(), ev.preventDefault()
margins = get_margins()
pos = {"x": ev.clientX, "y": ev.clientY}
pos = map_to_iframe_coords(pos, margins)
self.send_message("play", pos=pos)
def on_keydown(self, ev):
ev.stopPropagation(), ev.preventDefault()
if ev.key is "Escape":
self.hide()
return
if ev.key is " " or ev.key is "MediaPlayPause" or ev.key is "PlayPause":
self.toggle()
return
if ev.key is "Play" or ev.key is "MediaPlay":
self.play()
return
if ev.key is "Pause" or ev.key is "MediaPause":
self.pause()
return
sc_name = shortcut_for_key_event(ev, self.view.keyboard_shortcut_map)
if not sc_name:
return
if sc_name is "show_chrome":
self.hide()
elif sc_name is "quit":
self.hide()
elif sc_name in ("up", "down", "pageup", "pagedown", "left", "right"):
self.send_message("trigger-shortcut", name=sc_name)
def find_span_id_for_time(self, current_time):
if self.audio_map:
for span_id in self.audio_map[0]:
clip_begin_time = self.convert_time_to_seconds(self.audio_map[0][span_id]["clipBegin"])
clip_end_time = self.convert_time_to_seconds(self.audio_map[0][span_id]["clipEnd"])
if clip_begin_time <= current_time < clip_end_time:
return span_id
return None
def convert_time_to_seconds(self, time_string):
parts = time_string.split(":")
if len(parts) != 3:
return 0
hours, minutes, seconds = map(float, parts)
return hours * 3600 + minutes * 60 + seconds
def seconds_to_ms(self, seconds):
minutes = Math.floor(seconds / 60)
remaining_seconds = int(seconds % 60)
return str(minutes) + ':' + (str(remaining_seconds).zfill(2))
def send_message(self, message_type, **kw):
self.view.iframe_wrapper.send_message("audio-ebook", type=message_type, **kw)
def handle_message(self, message):
if message.type is "report-span-id":
if message.span_id:
old_span_id = self.span_id
self.span_id = message.span_id
self.send_message("mark", old_span_id=old_span_id, span_id=self.span_id)
self.audio_player.currentTime = self.convert_time_to_seconds(self.audio_map[0][self.span_id]["clipBegin"])
self.play()

View File

@ -34,6 +34,7 @@ from read_book.prefs.scrolling import (
MIN_SCROLL_SPEED_AUTO as SCROLL_SPEED_STEP, change_scroll_speed
)
from read_book.read_aloud import ReadAloud
from read_book.read_audio_ebook import ReadAudioEbook
from read_book.resources import load_resources
from read_book.scrollbar import BookScrollbar
from read_book.search import SearchOverlay
@ -266,6 +267,7 @@ class View:
'search_result_discovered': self.search_result_discovered,
'annotations': self.on_annotations_message,
'tts': self.on_tts_message,
'audio_ebook_message': self.on_audio_ebook_message,
'hints': self.on_hints_message,
'copy_text_to_clipboard': def(data):
ui_operations.copy_selection(data.text, data.html)
@ -311,6 +313,7 @@ class View:
E.div(style='position: absolute; top:0; left:0; width: 100%; height: 100%; display:none', id='book-content-popup-overlay'), # content popup overlay
E.div(style='position: absolute; top:0; left:0; width: 100%; height: 100%; overflow: auto; display:none', id='book-overlay'), # main overlay
E.div(style='position: absolute; top:0; left:0; width: 100%; height: 100%; display:none', id='controls-help-overlay'), # controls help overlay
E.div(style='position: absolute; bottom:0em; width: 100%; height: 100%; display:none', id='audio-ebooks-overlay'), # read audio ebook overlay
)
),
),
@ -371,6 +374,9 @@ class View:
def on_tts_message(self, data):
self.read_aloud.handle_message(data)
def on_audio_ebook_message(self, data):
self.read_audio_ebook.handle_message(data)
def on_hints_message(self, data):
self.hints.handle_message(data)
@ -673,6 +679,12 @@ class View:
self.iframe.contentWindow.focus()
def start_read_aloud(self, dont_start_talking):
if self.is_audio_ebook:
for x in self.modal_overlays:
if x is not self.read_audio_ebook:
x.hide()
self.read_audio_ebook.show()
else:
for x in self.modal_overlays:
if x is not self.read_aloud:
x.hide()
@ -681,7 +693,9 @@ class View:
self.read_aloud.play()
def toggle_read_aloud(self):
if self.read_aloud.is_visible:
if self.read_audio_ebook.is_visible:
self.read_audio_ebook.hide()
elif self.read_aloud.is_visible:
self.read_aloud.hide()
else:
self.start_read_aloud()
@ -923,6 +937,10 @@ class View:
self.book = current_book.book = book
hl = None
if not is_redisplay:
self.is_audio_ebook = undefined
if self.read_audio_ebook:
self.read_audio_ebook.hide()
clear(self.read_audio_ebook.container)
if runtime.is_standalone_viewer:
hl = book.highlights
v'delete book.highlights'
@ -975,6 +993,26 @@ class View:
show_controls_help()
sd.set('controls_help_shown_count' + ('_rtl_page_progression' if rtl_page_progression() else ''), c + 1)
if self.is_audio_ebook is undefined:
smil_files = []
audio_files = []
for filename in book.manifest.files:
if filename.endswith(".smil"):
smil_files.append(filename)
elif book.manifest.files[filename].mimetype is "audio/mpeg":
audio_files.append(filename)
if len(smil_files) > 0:
self.is_audio_ebook = True
self.read_audio_ebook = ReadAudioEbook(self)
self.read_audio_ebook.smil_files = smil_files
self.read_audio_ebook.audio_files = audio_files
self.read_audio_ebook.audio_maps = {}
for smil_file in smil_files:
ui_operations.get_file(
self.book, smil_file, self.read_audio_ebook.parse_smil_file)
else:
self.is_audio_ebook = False
def preferences_changed(self):
self.set_margins()
ui_operations.update_url_state(True)
@ -1051,6 +1089,8 @@ class View:
self.loaded_resources = resource_data
done_callback(resource_data)
load_resources(self.book, name, self.loaded_resources, cb)
if self.is_audio_ebook and self.read_audio_ebook.state != 'HIDDEN':
window.setTimeout(self.read_audio_ebook.change_audio_src, 1000) # wait for previous spine to update
def goto_doc_boundary(self, start):
name = self.book.manifest.spine[0 if start else self.book.manifest.spine.length - 1]

View File

@ -53,6 +53,23 @@ def first_visible_word():
if r?:
return r
def span_id_at_point(x, y):
elements = document.elementsFromPoint(x, y)
for element in elements:
spans = element.querySelectorAll('span[id]')
if len(spans) > 0:
return spans[0].id
def id_of_first_visible_span():
width = window.innerWidth
height = window.innerHeight
xdelta = width // 10
ydelta = height // 10
for y in range(0, height, ydelta):
for x in range(0, width, xdelta):
span_id = span_id_at_point(x, y)
if span_id:
return span_id
def empty_range_extents():
return {