mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Allow specifying the voice for a tag
This commit is contained in:
parent
871f262cac
commit
459d09eda1
@ -5,6 +5,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from contextlib import suppress
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
from typing import NamedTuple
|
from typing import NamedTuple
|
||||||
|
|
||||||
@ -133,9 +134,12 @@ class Sentence(NamedTuple):
|
|||||||
elem_id: str
|
elem_id: str
|
||||||
text: str
|
text: str
|
||||||
lang: str
|
lang: str
|
||||||
|
voice : str
|
||||||
|
|
||||||
|
|
||||||
def mark_sentences_in_html(root, lang: str = '') -> list[Sentence]:
|
def mark_sentences_in_html(root, lang: str = '', voice: str = '') -> list[Sentence]:
|
||||||
|
import json
|
||||||
|
|
||||||
from lxml.etree import ElementBase as Element
|
from lxml.etree import ElementBase as Element
|
||||||
from lxml.etree import tostring as _tostring
|
from lxml.etree import tostring as _tostring
|
||||||
|
|
||||||
@ -162,7 +166,8 @@ def mark_sentences_in_html(root, lang: str = '') -> list[Sentence]:
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
root_lang = lang_for_elem(root, canonicalize_lang(lang or get_lang())) or 'eng'
|
root_lang = canonicalize_lang(lang_for_elem(root, canonicalize_lang(lang or get_lang())) or 'en')
|
||||||
|
root_voice = voice
|
||||||
seen_ids = set(root.xpath('//*/@id'))
|
seen_ids = set(root.xpath('//*/@id'))
|
||||||
id_counter = 1
|
id_counter = 1
|
||||||
ans = []
|
ans = []
|
||||||
@ -177,10 +182,18 @@ def mark_sentences_in_html(root, lang: str = '') -> list[Sentence]:
|
|||||||
|
|
||||||
class Parent:
|
class Parent:
|
||||||
|
|
||||||
def __init__(self, elem, tag_name, parent_lang, child_lang=''):
|
def __init__(self, elem, tag_name, parent_lang, parent_voice, child_lang=''):
|
||||||
self.elem = elem
|
self.elem = elem
|
||||||
self.tag_name = tag_name
|
self.tag_name = tag_name
|
||||||
self.lang = child_lang or lang_for_elem(elem, parent_lang)
|
self.lang = child_lang or lang_for_elem(elem, parent_lang)
|
||||||
|
q = elem.get('data-calibre-tts', '')
|
||||||
|
self.voice = parent_voice
|
||||||
|
if q.startswith('{'): # }
|
||||||
|
with suppress(Exception):
|
||||||
|
q = json.loads(q)
|
||||||
|
self.voice = q.get('voice') or parent_voice
|
||||||
|
else:
|
||||||
|
self.voice = q or parent_voice
|
||||||
self.pos = 0
|
self.pos = 0
|
||||||
self.texts = []
|
self.texts = []
|
||||||
if elem.text and elem.text.strip():
|
if elem.text and elem.text.strip():
|
||||||
@ -205,7 +218,7 @@ def mark_sentences_in_html(root, lang: str = '') -> list[Sentence]:
|
|||||||
self.pos = 0
|
self.pos = 0
|
||||||
for start, length in sentence_positions(text, self.lang):
|
for start, length in sentence_positions(text, self.lang):
|
||||||
elem_id = self.wrap_sentence(start, length)
|
elem_id = self.wrap_sentence(start, length)
|
||||||
ans.append(Sentence(elem_id, text[start:start+length], lang))
|
ans.append(Sentence(elem_id, text[start:start+length], self.lang, self.voice))
|
||||||
self.texts = []
|
self.texts = []
|
||||||
self.pos = 0
|
self.pos = 0
|
||||||
|
|
||||||
@ -413,22 +426,23 @@ def mark_sentences_in_html(root, lang: str = '') -> list[Sentence]:
|
|||||||
self.pos += 1
|
self.pos += 1
|
||||||
return w.get('id')
|
return w.get('id')
|
||||||
|
|
||||||
stack_of_parents = [Parent(elem, 'body', root_lang) for elem in root.iterchildren('*') if barename(elem.tag).lower() == 'body']
|
stack_of_parents = [Parent(elem, 'body', root_lang, root_voice) for elem in root.iterchildren('*') if barename(elem.tag).lower() == 'body']
|
||||||
while stack_of_parents:
|
while stack_of_parents:
|
||||||
p = stack_of_parents.pop()
|
p = stack_of_parents.pop()
|
||||||
if len(p.elem) == 1 and not has_text(p.elem): # wrapper
|
if len(p.elem) == 1 and not has_text(p.elem): # wrapper
|
||||||
c = p.elem[0]
|
c = p.elem[0]
|
||||||
if isinstance(c.tag, str):
|
if isinstance(c.tag, str):
|
||||||
stack_of_parents.append(Parent(c, barename(c.tag).lower(), p.lang))
|
stack_of_parents.append(Parent(c, barename(c.tag).lower(), p.lang, p.voice))
|
||||||
continue
|
continue
|
||||||
for i in range(p.child_pos, len(p.children)):
|
for i in range(p.child_pos, len(p.children)):
|
||||||
child = p.children[i]
|
child = p.children[i]
|
||||||
|
child_voice = child.get('data-calibre-tts', '')
|
||||||
child_lang = lang_for_elem(child, p.lang)
|
child_lang = lang_for_elem(child, p.lang)
|
||||||
child_tag_name = barename(child.tag).lower() if isinstance(child.tag, str) else ''
|
child_tag_name = barename(child.tag).lower() if isinstance(child.tag, str) else ''
|
||||||
if child_lang == p.lang and child_tag_name in continued_tag_names and len(child) == 0:
|
if child_lang == p.lang and child_voice == p.voice and child_tag_name in continued_tag_names and len(child) == 0:
|
||||||
p.add_simple_child(child)
|
p.add_simple_child(child)
|
||||||
elif child_tag_name not in ignored_tag_names:
|
elif child_tag_name not in ignored_tag_names:
|
||||||
stack_of_parents.append(Parent(child, child_tag_name, p.lang, child_lang))
|
stack_of_parents.append(Parent(child, child_tag_name, p.lang, p.voice, child_lang=child_lang))
|
||||||
p.commit()
|
p.commit()
|
||||||
p.child_pos = i + 1
|
p.child_pos = i + 1
|
||||||
stack_of_parents.append(p)
|
stack_of_parents.append(p)
|
||||||
|
@ -300,6 +300,12 @@ class TestICU(unittest.TestCase):
|
|||||||
|
|
||||||
'<p><i>Hello, </i><b>world</b>! Good day to you':
|
'<p><i>Hello, </i><b>world</b>! Good day to you':
|
||||||
'<body><p><span id="1"><i>Hello, </i><b>world</b>! </span><span id="2">Good day to you</span></p>',
|
'<body><p><span id="1"><i>Hello, </i><b>world</b>! </span><span id="2">Good day to you</span></p>',
|
||||||
|
|
||||||
|
'<p>Hello, <span lang="fr">world!':
|
||||||
|
'<body><p><span id="1">Hello, </span><span lang="fr"><span id="2">world!</span></span></p>',
|
||||||
|
|
||||||
|
'<p>Hello, <span data-calibre-tts="moose">world!':
|
||||||
|
'<body><p><span id="1">Hello, </span><span data-calibre-tts="moose"><span id="2">world!</span></span></p>',
|
||||||
}.items()):
|
}.items()):
|
||||||
root = parse(text, namespace_elements=True)
|
root = parse(text, namespace_elements=True)
|
||||||
mark_sentences_in_html(root)
|
mark_sentences_in_html(root)
|
||||||
@ -308,6 +314,8 @@ class TestICU(unittest.TestCase):
|
|||||||
actual = actual[:actual.rfind('</body>')]
|
actual = actual[:actual.rfind('</body>')]
|
||||||
actual = actual.replace('cttsw-', '')
|
actual = actual.replace('cttsw-', '')
|
||||||
self.ae(expected, actual)
|
self.ae(expected, actual)
|
||||||
|
sentences = mark_sentences_in_html(parse('<p lang="en">Hello, <span lang="fr">world!'))
|
||||||
|
self.ae(tuple(s.lang for s in sentences), ('eng', 'fra'))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user