diff --git a/src/calibre/gui2/tts/common.py b/src/calibre/gui2/tts/common.py index ad48864d71..b149ad9f6b 100644 --- a/src/calibre/gui2/tts/common.py +++ b/src/calibre/gui2/tts/common.py @@ -19,3 +19,6 @@ class Event: def __init__(self, etype, data=None): self.type = etype self.data = data + + def __repr__(self): + return f'Event(type={self.type}, data={self.data})' diff --git a/src/calibre/gui2/viewer/tts.py b/src/calibre/gui2/viewer/tts.py index 4bc0eb5427..afd935a6c4 100644 --- a/src/calibre/gui2/viewer/tts.py +++ b/src/calibre/gui2/viewer/tts.py @@ -3,12 +3,26 @@ # License: GPL v3 Copyright: 2020, Kovid Goyal from PyQt5.Qt import QObject, pyqtSignal + from calibre.gui2 import error_dialog +def add_markup(text_parts): + from calibre.gui2.tts.implementation import Client + buf = [] + bm = Client.mark_template + for x in text_parts: + if isinstance(x, int): + buf.append(bm.format(x)) + else: + buf.append(Client.escape_marked_text(x)) + return ''.join(buf) + + class TTS(QObject): dispatch_on_main_thread_signal = pyqtSignal(object) + event_received = pyqtSignal(object, object) def __init__(self, parent=None): QObject.__init__(self, parent) @@ -49,8 +63,20 @@ class TTS(QObject): return error_dialog(self.parent(), _('Text-to-Speech unavailable'), str(err), show=True) def play(self, data): - text = data['text'] - print(11111, text) + marked_text = add_markup(data['marked_text']) + self.tts_client.speak_marked_text(marked_text, self.callback) + + def pause(self, data): + self.tts_client.pause() + + def resume(self, data): + self.tts_client.resume() + + def callback(self, event): + data = event.data + if event.type is event.type.mark: + data = int(data) + self.event_received.emit(event.type.name, data) def stop(self, data): self.tts_client.stop() diff --git a/src/calibre/gui2/viewer/web_view.py b/src/calibre/gui2/viewer/web_view.py index 084f333502..d57ccc3f6e 100644 --- a/src/calibre/gui2/viewer/web_view.py +++ b/src/calibre/gui2/viewer/web_view.py @@ -288,6 +288,7 @@ class ViewerBridge(Bridge): show_search_result = to_js() prepare_for_close = to_js() viewer_font_size_changed = to_js() + tts_event = to_js() def apply_font_settings(page_or_view): @@ -474,6 +475,7 @@ class WebView(RestartingWebEngineView): self.current_cfi = self.current_content_file = None RestartingWebEngineView.__init__(self, parent) self.tts = TTS(self) + self.tts.event_received.connect(self.tts_event_received) self.dead_renderer_error_shown = False self.render_process_failed.connect(self.render_process_died) w = QApplication.instance().desktop().availableGeometry(self).width() @@ -709,3 +711,6 @@ class WebView(RestartingWebEngineView): def generic_action(self, which, data): self.execute_when_ready('generic_action', which, data) + + def tts_event_received(self, which, data): + self.execute_when_ready('tts_event', which, data) diff --git a/src/pyj/range_utils.pyj b/src/pyj/range_utils.pyj index 55cc7603fd..b0f55fe812 100644 --- a/src/pyj/range_utils.pyj +++ b/src/pyj/range_utils.pyj @@ -28,6 +28,25 @@ def text_nodes_in_range(r): return ans +def first_non_empty_text_node_in_range(r): + parent = r.commonAncestorContainer + doc = parent.ownerDocument or document + iterator = doc.createNodeIterator(parent) + in_range = False + while True: + node = iterator.nextNode() + if not node: + break + if not in_range and node.isSameNode(r.startContainer): + in_range = True + if in_range: + if is_non_empty_text_node(node): + return node + if node.isSameNode(r.endContainer): + break + + + def first_annot_in_range(r, annot_id_uuid_map): parent = r.commonAncestorContainer doc = parent.ownerDocument or document diff --git a/src/pyj/read_book/find.pyj b/src/pyj/read_book/find.pyj index 040cd67f30..94527ec47d 100644 --- a/src/pyj/read_book/find.pyj +++ b/src/pyj/read_book/find.pyj @@ -10,16 +10,18 @@ def build_text_map(): 'style': True, 'script': True, 'noscript': True, 'title': True, 'meta': True, 'head': True, 'link': True, 'html': True, 'img': True } + text_node_type = Node.TEXT_NODE + element_node_type = Node.ELEMENT_NODE def process_node(node): nonlocal flat_text nt = node.nodeType - if nt is Node.TEXT_NODE: + if nt is text_node_type: text = node.nodeValue if text and text.length: node_list.push(v"{node: node, offset: flat_text.length, length: text.length}") flat_text += text - elif nt is Node.ELEMENT_NODE: + elif nt is element_node_type: if not node.hasChildNodes(): return tag = node.tagName.toLowerCase() @@ -36,6 +38,42 @@ def build_text_map(): return {'timestamp': window.performance.now(), 'flat_text': flat_text, 'node_list': node_list} +def index_for_node(node, node_list): + for entry in node_list: + if entry.node.isSameNode(node): + return entry.offset + + +def tts_data(text_node, offset): + offset_in_flat_text = offset or 0 + if not cache.text_map: + cache.text_map = build_text_map() + if text_node: + offset_in_flat_text += index_for_node(text_node, cache.text_map.node_list) or 0 + match = None + first = True + last = None + marked_text = v'[]' + text = cache.text_map.flat_text[offset_in_flat_text:] + for v'match of text.matchAll(/\w+/g)': + start = match.index + if first: + first = False + if start: + marked_text.push(text[:start]) + marked_text.push(start + offset_in_flat_text) + marked_text.push(match[0]) + last = start + match.length + if last is None: + marked_text.push(text) + else: + trailer = text[last:] + if trailer: + marked_text.push(trailer) + return marked_text + + + def find_node_for_index_binary(node_list, idx_in_flat_text, start): # Do a binary search for idx start = start or 0 @@ -55,24 +93,22 @@ def find_node_for_index_binary(node_list, idx_in_flat_text, start): return None, None, None -def find_node_for_index_linear(node_list, idx_in_flat_text, start): - start = start or 0 - for i in range(start, node_list.length): - q = node_list[i] - limit = q.offset + q.length - if q.offset <= idx_in_flat_text and limit > idx_in_flat_text: - start_node = q.node - start_offset = idx_in_flat_text - q.offset - return start_node, start_offset, i - return None, None, None +def get_occurrence_data(node_list, start, end): + start_node, start_offset, start_pos = find_node_for_index_binary(node_list, start) + if start_node is not None: + end_node, end_offset, node_pos = find_node_for_index_binary(node_list, end, start_pos) + if end_node is not None: + return { + 'start_node': start_node, 'start_offset': start_offset, 'start_pos': start_pos, + 'end_node': end_node, 'end_offset': end_offset, 'end_pos': node_pos, + } -def find_specific_occurrence(q, num, before_len, after_len, text_map): +def find_specific_occurrence(q, num, before_len, after_len, text_map, from_offset): if not q or not q.length: return - from_idx = 0 + from_idx = from_offset or 0 flat_text = text_map.flat_text - pos = 0 match_num = -1 while True: idx = flat_text.indexOf(q, from_idx) @@ -82,17 +118,7 @@ def find_specific_occurrence(q, num, before_len, after_len, text_map): from_idx = idx + 1 if match_num < num: continue - start_node, start_offset, node_pos = find_node_for_index_binary(text_map.node_list, idx + before_len, pos) - if start_node is not None: - pos = node_pos - end_node, end_offset, node_pos = find_node_for_index_linear(text_map.node_list, idx + q.length - after_len, pos) - if end_node is not None: - return { - 'start_node': start_node, 'start_offset': start_offset, 'start_pos': pos, - 'end_node': end_node, 'end_offset': end_offset, 'end_pos': node_pos, - 'idx_in_flat_text': idx - } - break + return get_occurrence_data(text_map.node_list, idx + before_len, idx + q.length - after_len) cache = {} @@ -122,7 +148,22 @@ def select_search_result(sr): after = sr.after[:5] after_len = after.length q += after - match = find_specific_occurrence(q, int(sr.index), before_len, after_len, cache.text_map) + match = find_specific_occurrence(q, int(sr.index), before_len, after_len, cache.text_map, sr.from_offset) + if not match: + return False + select_find_result(match) + return True + + +def select_tts_mark(idx_in_flat_text): + window.getSelection().removeAllRanges() + if not cache.text_map: + cache.text_map = build_text_map() + r = /\w+/g + r.lastIndex = idx_in_flat_text + match = v'r.exec(cache.text_map.flat_text)' + word_length = match[0]?.length or 5 + match = get_occurrence_data(cache.text_map.node_list, idx_in_flat_text, idx_in_flat_text + word_length) if not match: return False select_find_result(match) diff --git a/src/pyj/read_book/iframe.pyj b/src/pyj/read_book/iframe.pyj index 37a58a6480..6c192f13d4 100644 --- a/src/pyj/read_book/iframe.pyj +++ b/src/pyj/read_book/iframe.pyj @@ -3,18 +3,19 @@ from __python__ import bound_methods, hash_literals import traceback -from gettext import gettext as _ -from select import move_end_of_selection, selection_extents, word_at_point, range_for_tts - from fs_images import fix_fullscreen_svg_images +from gettext import gettext as _ from iframe_comm import IframeClient from range_utils import ( - highlight_associated_with_selection, last_span_for_crw, reset_highlight_counter, - select_crw, unwrap_all_crw, unwrap_crw, wrap_text_in_range + first_non_empty_text_node_in_range, highlight_associated_with_selection, + last_span_for_crw, reset_highlight_counter, select_crw, unwrap_all_crw, + unwrap_crw, wrap_text_in_range ) from read_book.cfi import cfi_for_selection, range_from_cfi from read_book.extract import get_elements -from read_book.find import reset_find_caches, select_search_result +from read_book.find import ( + reset_find_caches, select_search_result, select_tts_mark, tts_data +) from read_book.flow_mode import ( anchor_funcs as flow_anchor_funcs, auto_scroll_action as flow_auto_scroll_action, cancel_drag_scroll as cancel_drag_scroll_flow, ensure_selection_visible, @@ -38,8 +39,8 @@ from read_book.paged_mode import ( cancel_drag_scroll as cancel_drag_scroll_paged, current_cfi, get_columns_per_screen_data, handle_gesture as paged_handle_gesture, handle_shortcut as paged_handle_shortcut, jump_to_cfi as paged_jump_to_cfi, - layout as paged_layout, onwheel as paged_onwheel, - prepare_for_resize as paged_prepare_for_resize, progress_frac, page_counts, + layout as paged_layout, onwheel as paged_onwheel, page_counts, + prepare_for_resize as paged_prepare_for_resize, progress_frac, reset_paged_mode_globals, resize_done as paged_resize_done, scroll_by_page as paged_scroll_by_page, scroll_to_elem, scroll_to_extend_annotation as paged_annotation_scroll, @@ -57,11 +58,14 @@ from read_book.settings import ( from read_book.shortcuts import ( create_shortcut_map, keyevent_as_shortcut, shortcut_for_key_event ) -from read_book.toc import update_visible_toc_anchors, find_anchor_before_range +from read_book.toc import find_anchor_before_range, update_visible_toc_anchors from read_book.touch import ( create_handlers as create_touch_handlers, reset_handlers as reset_touch_handlers ) from read_book.viewport import scroll_viewport +from select import ( + move_end_of_selection, range_for_tts, selection_extents, word_at_point +) from utils import debounce, is_ios FORCE_FLOW_MODE = False @@ -893,10 +897,23 @@ class IframeBoss: self.send_message('copy_text_to_clipboard', text=text, html=container.innerHTML) def tts_msg_received(self, data): - if data.type is 'play': - r = range_for_tts() - text = r.toString() - self.send_message('tts', type='text-extracted', text=text) + if data.type is 'mark': + self.mark_word_being_spoken(data.num) + elif data.type is 'play': + if data.x? and data.y?: + r = range_for_tts(data.x, data.y) + text_node, offset = first_non_empty_text_node_in_range(r) + else: + text_node, offset = None, 0 + marked_text = tts_data(text_node, offset) + sel = window.getSelection() + sel.removeAllRanges() + self.send_message('tts', type='text-extracted', marked_text=marked_text) + + def mark_word_being_spoken(self, occurrence_number): + self.last_search_at = window.performance.now() + if select_tts_mark(occurrence_number): + self.ensure_selection_visible() def main(): main.boss = IframeBoss() diff --git a/src/pyj/read_book/read_aloud.pyj b/src/pyj/read_book/read_aloud.pyj index 020917c222..84cf7cfdb2 100644 --- a/src/pyj/read_book/read_aloud.pyj +++ b/src/pyj/read_book/read_aloud.pyj @@ -168,9 +168,15 @@ class ReadAloud: elif sc_name in ('up', 'down', 'pageup', 'pagedown', 'left', 'right'): self.send_message('trigger-shortcut', name=sc_name) + def handle_tts_event(self, which, data): + if which is 'mark': + self.send_message('mark', num=data) + elif which is 'begin': + self.state = PLAYING + def send_message(self, type, **kw): self.view.iframe_wrapper.send_message('tts', type=type, **kw) def handle_message(self, msg): if msg.type is 'text-extracted': - ui_operations.tts('play', {'text': msg.text}) + ui_operations.tts('play', {'marked_text': msg.marked_text}) diff --git a/src/pyj/viewer-main.pyj b/src/pyj/viewer-main.pyj index fc1724925d..997d0f6fb3 100644 --- a/src/pyj/viewer-main.pyj +++ b/src/pyj/viewer-main.pyj @@ -205,6 +205,11 @@ def generic_action(which, data): view.set_notes_for_highlight(data.uuid, data.notes or '') +@from_python +def tts_event(which, data): + view.read_aloud.handle_tts_event(which, data) + + @from_python def show_home_page(): view.overlay.open_book(False)