Add code to get the anchor offset map

This commit is contained in:
Kovid Goyal 2021-05-18 08:48:50 +05:30
parent 267ab37964
commit d4f9c559c3
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 20 additions and 4 deletions

View File

@ -403,7 +403,8 @@ def unserialize_html_legacy(serialized_data, proceed, postprocess_dom, root_name
proceeded = True
proceed()
def text_from_serialized_html(data):
def text_from_serialized_html(data, get_anchor_offset_map):
serialized_data = JSON.parse(data)
tag_map = serialized_data.tag_map
ans = v'[]'
@ -415,17 +416,29 @@ def text_from_serialized_html(data):
if child.n is 'body':
stack.push(child)
ignore_text = {'script':True, 'style':True, 'title': True}
anchor_offset_map = {}
text_pos = 0
while stack.length:
node = stack.pop()
if jstype(node) is 'string':
ans.push(node)
text_pos += node.length
continue
if tag_map:
src = tag_map[node[0]]
else:
src = node
if get_anchor_offset_map and src.a:
for v'var i = 0; i < src.a.length; i++':
x = src.a[i]
if x[0] is 'id':
aid = x[1]
if jstype(anchor_offset_map[aid]) is not 'number':
anchor_offset_map[aid] = text_pos
if src.n and not ignore_text[src.n] and src.x:
ans.push(src.x)
text_pos += src.x.length
if src.l:
stack.push(src.l)
if tag_map:
@ -435,4 +448,7 @@ def text_from_serialized_html(data):
if src.c:
for v'var i = src.c.length; i-- > 0;':
stack.push(v'src.c[i]')
return ans.join('')
ans = ans.join('')
if get_anchor_offset_map:
return ans, anchor_offset_map
return ans

View File

@ -84,7 +84,7 @@ def search_in_text_of(name):
ctx_size = 75
r = wc.regex
r.lastIndex = 0
haystack = wc.text_cache[name] or ''
haystack = wc.text_cache[name][0] or ''
match_counts = {}
spine_idx = wc.current_query.spine.indexOf(name)
while True:
@ -126,7 +126,7 @@ def got_spine_item(query_id, spine_idx, result):
return
if result.ok:
name = wc.current_query.spine[spine_idx]
wc.text_cache[name] = text_from_serialized_html(result.result)
wc.text_cache[name] = text_from_serialized_html(result.result, True)
search_in_text_of(name)
setTimeout(queue_next_spine_item.bind(None, spine_idx + 1), 0)
else: