mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
E-book viewer: Fix a regression that broke searching in Japanese books that use <ruby> text. Fixes #2071348 [Error when trying to jump to in-book result location if the displayed result sentence has conterpart in book that has ruby text](https://bugs.launchpad.net/calibre/+bug/2071348)
This commit is contained in:
parent
7597538345
commit
6170e8c560
@ -233,14 +233,16 @@ def searchable_text_for_name(name):
|
||||
ignore_text = frozenset({'img', 'math', 'rt', 'rp', 'rtc'})
|
||||
for child in serialized_data['tree']['c']:
|
||||
if child.get('n') == 'body':
|
||||
a((child, False))
|
||||
a((child, False, False))
|
||||
# the JS code does not add the tail of body tags to flat text
|
||||
removed_tails.append((child.pop('l', None), child))
|
||||
text_pos = 0
|
||||
anchor_offset_map = OrderedDict()
|
||||
while stack:
|
||||
node, text_ignored_in_parent = stack.pop()
|
||||
node, text_ignored_in_parent, in_ruby = stack.pop()
|
||||
if isinstance(node, str):
|
||||
if in_ruby:
|
||||
node = node.strip()
|
||||
add_text(node)
|
||||
text_pos += len(node)
|
||||
continue
|
||||
@ -258,16 +260,21 @@ def searchable_text_for_name(name):
|
||||
anchor_offset_map[aid] = text_pos
|
||||
if name in no_visit:
|
||||
continue
|
||||
node_in_ruby = in_ruby
|
||||
if not in_ruby and name == 'ruby':
|
||||
in_ruby = True
|
||||
ignore_text_in_node_and_children = text_ignored_in_parent or name in ignore_text
|
||||
|
||||
if text and not ignore_text_in_node_and_children:
|
||||
if in_ruby:
|
||||
text = text.strip()
|
||||
add_text(text)
|
||||
text_pos += len(text)
|
||||
if tail and not text_ignored_in_parent:
|
||||
a((tail, ignore_text_in_node_and_children))
|
||||
a((tail, ignore_text_in_node_and_children, node_in_ruby))
|
||||
if children:
|
||||
for child in reversed(children):
|
||||
a((child, ignore_text_in_node_and_children))
|
||||
a((child, ignore_text_in_node_and_children, in_ruby))
|
||||
for (tail, body) in removed_tails:
|
||||
if tail is not None:
|
||||
body['l'] = tail
|
||||
|
@ -4,7 +4,7 @@ from __python__ import bound_methods, hash_literals
|
||||
|
||||
ignored_tags = {
|
||||
'style': True, 'script': True, 'noscript': True, 'title': True, 'meta': True, 'head': True, 'link': True, 'html': True,
|
||||
'img': True
|
||||
'img': True, 'rt': True, 'rp': True, 'rtc': True,
|
||||
}
|
||||
|
||||
block_tags_for_tts = {
|
||||
@ -16,13 +16,20 @@ def build_text_map(for_tts):
|
||||
flat_text = ''
|
||||
text_node_type = Node.TEXT_NODE
|
||||
element_node_type = Node.ELEMENT_NODE
|
||||
in_ruby = 0
|
||||
|
||||
def process_node(node):
|
||||
nonlocal flat_text
|
||||
nonlocal flat_text, in_ruby
|
||||
nt = node.nodeType
|
||||
if nt is text_node_type:
|
||||
text = node.nodeValue
|
||||
if text and text.length:
|
||||
if in_ruby:
|
||||
rtext = text.trim()
|
||||
if rtext.length:
|
||||
node_list.push(v"{node: node, offset: flat_text.length, length: rtext.length, offset_in_node: text.length - text.trimStart().length}")
|
||||
flat_text += rtext
|
||||
else:
|
||||
node_list.push(v"{node: node, offset: flat_text.length, length: text.length}")
|
||||
flat_text += text
|
||||
elif nt is element_node_type:
|
||||
@ -31,9 +38,14 @@ def build_text_map(for_tts):
|
||||
tag = node.tagName.toLowerCase()
|
||||
if ignored_tags[tag]:
|
||||
return
|
||||
is_ruby_tag = tag is 'ruby'
|
||||
if is_ruby_tag:
|
||||
in_ruby += 1
|
||||
children = node.childNodes
|
||||
for i in range(children.length):
|
||||
process_node(v'children[i]')
|
||||
if is_ruby_tag:
|
||||
in_ruby -= 1
|
||||
if for_tts and block_tags_for_tts[tag]:
|
||||
# add a paragraph separator after block tags so that sentence splitting works
|
||||
if flat_text.length and ' \n\t\r'.indexOf(flat_text[-1]) > -1:
|
||||
@ -106,7 +118,7 @@ def find_node_for_index_binary(node_list, idx_in_flat_text, start):
|
||||
if q.offset <= idx_in_flat_text and limit > idx_in_flat_text:
|
||||
start_node = q.node
|
||||
start_offset = idx_in_flat_text - q.offset
|
||||
return start_node, start_offset, mid
|
||||
return start_node, start_offset + (q.offset_in_node or 0), mid
|
||||
if limit <= idx_in_flat_text:
|
||||
start = mid + 1
|
||||
else:
|
||||
|
Loading…
x
Reference in New Issue
Block a user