mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
E-book viewer: Fix a regression that broke searching in Japanese books that use <ruby> text. Fixes #2071348 [Error when trying to jump to in-book result location if the displayed result sentence has conterpart in book that has ruby text](https://bugs.launchpad.net/calibre/+bug/2071348)
This commit is contained in:
parent
7597538345
commit
6170e8c560
@ -233,14 +233,16 @@ def searchable_text_for_name(name):
|
|||||||
ignore_text = frozenset({'img', 'math', 'rt', 'rp', 'rtc'})
|
ignore_text = frozenset({'img', 'math', 'rt', 'rp', 'rtc'})
|
||||||
for child in serialized_data['tree']['c']:
|
for child in serialized_data['tree']['c']:
|
||||||
if child.get('n') == 'body':
|
if child.get('n') == 'body':
|
||||||
a((child, False))
|
a((child, False, False))
|
||||||
# the JS code does not add the tail of body tags to flat text
|
# the JS code does not add the tail of body tags to flat text
|
||||||
removed_tails.append((child.pop('l', None), child))
|
removed_tails.append((child.pop('l', None), child))
|
||||||
text_pos = 0
|
text_pos = 0
|
||||||
anchor_offset_map = OrderedDict()
|
anchor_offset_map = OrderedDict()
|
||||||
while stack:
|
while stack:
|
||||||
node, text_ignored_in_parent = stack.pop()
|
node, text_ignored_in_parent, in_ruby = stack.pop()
|
||||||
if isinstance(node, str):
|
if isinstance(node, str):
|
||||||
|
if in_ruby:
|
||||||
|
node = node.strip()
|
||||||
add_text(node)
|
add_text(node)
|
||||||
text_pos += len(node)
|
text_pos += len(node)
|
||||||
continue
|
continue
|
||||||
@ -258,16 +260,21 @@ def searchable_text_for_name(name):
|
|||||||
anchor_offset_map[aid] = text_pos
|
anchor_offset_map[aid] = text_pos
|
||||||
if name in no_visit:
|
if name in no_visit:
|
||||||
continue
|
continue
|
||||||
|
node_in_ruby = in_ruby
|
||||||
|
if not in_ruby and name == 'ruby':
|
||||||
|
in_ruby = True
|
||||||
ignore_text_in_node_and_children = text_ignored_in_parent or name in ignore_text
|
ignore_text_in_node_and_children = text_ignored_in_parent or name in ignore_text
|
||||||
|
|
||||||
if text and not ignore_text_in_node_and_children:
|
if text and not ignore_text_in_node_and_children:
|
||||||
|
if in_ruby:
|
||||||
|
text = text.strip()
|
||||||
add_text(text)
|
add_text(text)
|
||||||
text_pos += len(text)
|
text_pos += len(text)
|
||||||
if tail and not text_ignored_in_parent:
|
if tail and not text_ignored_in_parent:
|
||||||
a((tail, ignore_text_in_node_and_children))
|
a((tail, ignore_text_in_node_and_children, node_in_ruby))
|
||||||
if children:
|
if children:
|
||||||
for child in reversed(children):
|
for child in reversed(children):
|
||||||
a((child, ignore_text_in_node_and_children))
|
a((child, ignore_text_in_node_and_children, in_ruby))
|
||||||
for (tail, body) in removed_tails:
|
for (tail, body) in removed_tails:
|
||||||
if tail is not None:
|
if tail is not None:
|
||||||
body['l'] = tail
|
body['l'] = tail
|
||||||
|
@ -4,7 +4,7 @@ from __python__ import bound_methods, hash_literals
|
|||||||
|
|
||||||
ignored_tags = {
|
ignored_tags = {
|
||||||
'style': True, 'script': True, 'noscript': True, 'title': True, 'meta': True, 'head': True, 'link': True, 'html': True,
|
'style': True, 'script': True, 'noscript': True, 'title': True, 'meta': True, 'head': True, 'link': True, 'html': True,
|
||||||
'img': True
|
'img': True, 'rt': True, 'rp': True, 'rtc': True,
|
||||||
}
|
}
|
||||||
|
|
||||||
block_tags_for_tts = {
|
block_tags_for_tts = {
|
||||||
@ -16,24 +16,36 @@ def build_text_map(for_tts):
|
|||||||
flat_text = ''
|
flat_text = ''
|
||||||
text_node_type = Node.TEXT_NODE
|
text_node_type = Node.TEXT_NODE
|
||||||
element_node_type = Node.ELEMENT_NODE
|
element_node_type = Node.ELEMENT_NODE
|
||||||
|
in_ruby = 0
|
||||||
|
|
||||||
def process_node(node):
|
def process_node(node):
|
||||||
nonlocal flat_text
|
nonlocal flat_text, in_ruby
|
||||||
nt = node.nodeType
|
nt = node.nodeType
|
||||||
if nt is text_node_type:
|
if nt is text_node_type:
|
||||||
text = node.nodeValue
|
text = node.nodeValue
|
||||||
if text and text.length:
|
if text and text.length:
|
||||||
node_list.push(v"{node: node, offset: flat_text.length, length: text.length}")
|
if in_ruby:
|
||||||
flat_text += text
|
rtext = text.trim()
|
||||||
|
if rtext.length:
|
||||||
|
node_list.push(v"{node: node, offset: flat_text.length, length: rtext.length, offset_in_node: text.length - text.trimStart().length}")
|
||||||
|
flat_text += rtext
|
||||||
|
else:
|
||||||
|
node_list.push(v"{node: node, offset: flat_text.length, length: text.length}")
|
||||||
|
flat_text += text
|
||||||
elif nt is element_node_type:
|
elif nt is element_node_type:
|
||||||
if not node.hasChildNodes():
|
if not node.hasChildNodes():
|
||||||
return
|
return
|
||||||
tag = node.tagName.toLowerCase()
|
tag = node.tagName.toLowerCase()
|
||||||
if ignored_tags[tag]:
|
if ignored_tags[tag]:
|
||||||
return
|
return
|
||||||
|
is_ruby_tag = tag is 'ruby'
|
||||||
|
if is_ruby_tag:
|
||||||
|
in_ruby += 1
|
||||||
children = node.childNodes
|
children = node.childNodes
|
||||||
for i in range(children.length):
|
for i in range(children.length):
|
||||||
process_node(v'children[i]')
|
process_node(v'children[i]')
|
||||||
|
if is_ruby_tag:
|
||||||
|
in_ruby -= 1
|
||||||
if for_tts and block_tags_for_tts[tag]:
|
if for_tts and block_tags_for_tts[tag]:
|
||||||
# add a paragraph separator after block tags so that sentence splitting works
|
# add a paragraph separator after block tags so that sentence splitting works
|
||||||
if flat_text.length and ' \n\t\r'.indexOf(flat_text[-1]) > -1:
|
if flat_text.length and ' \n\t\r'.indexOf(flat_text[-1]) > -1:
|
||||||
@ -106,7 +118,7 @@ def find_node_for_index_binary(node_list, idx_in_flat_text, start):
|
|||||||
if q.offset <= idx_in_flat_text and limit > idx_in_flat_text:
|
if q.offset <= idx_in_flat_text and limit > idx_in_flat_text:
|
||||||
start_node = q.node
|
start_node = q.node
|
||||||
start_offset = idx_in_flat_text - q.offset
|
start_offset = idx_in_flat_text - q.offset
|
||||||
return start_node, start_offset, mid
|
return start_node, start_offset + (q.offset_in_node or 0), mid
|
||||||
if limit <= idx_in_flat_text:
|
if limit <= idx_in_flat_text:
|
||||||
start = mid + 1
|
start = mid + 1
|
||||||
else:
|
else:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user