diff --git a/src/pyj/read_book/resources.pyj b/src/pyj/read_book/resources.pyj index 0e713715fd..4fcacb080b 100644 --- a/src/pyj/read_book/resources.pyj +++ b/src/pyj/read_book/resources.pyj @@ -403,7 +403,8 @@ def unserialize_html_legacy(serialized_data, proceed, postprocess_dom, root_name proceeded = True proceed() -def text_from_serialized_html(data): + +def text_from_serialized_html(data, get_anchor_offset_map): serialized_data = JSON.parse(data) tag_map = serialized_data.tag_map ans = v'[]' @@ -415,17 +416,29 @@ def text_from_serialized_html(data): if child.n is 'body': stack.push(child) ignore_text = {'script':True, 'style':True, 'title': True} + anchor_offset_map = {} + text_pos = 0 while stack.length: node = stack.pop() if jstype(node) is 'string': ans.push(node) + text_pos += node.length continue if tag_map: src = tag_map[node[0]] else: src = node + if get_anchor_offset_map and src.a: + for v'var i = 0; i < src.a.length; i++': + x = src.a[i] + if x[0] is 'id': + aid = x[1] + if jstype(anchor_offset_map[aid]) is not 'number': + anchor_offset_map[aid] = text_pos + if src.n and not ignore_text[src.n] and src.x: ans.push(src.x) + text_pos += src.x.length if src.l: stack.push(src.l) if tag_map: @@ -435,4 +448,7 @@ def text_from_serialized_html(data): if src.c: for v'var i = src.c.length; i-- > 0;': stack.push(v'src.c[i]') - return ans.join('') + ans = ans.join('') + if get_anchor_offset_map: + return ans, anchor_offset_map + return ans diff --git a/src/pyj/read_book/search_worker.pyj b/src/pyj/read_book/search_worker.pyj index c47df4ce75..b2963c2c45 100644 --- a/src/pyj/read_book/search_worker.pyj +++ b/src/pyj/read_book/search_worker.pyj @@ -84,7 +84,7 @@ def search_in_text_of(name): ctx_size = 75 r = wc.regex r.lastIndex = 0 - haystack = wc.text_cache[name] or '' + haystack = wc.text_cache[name][0] or '' match_counts = {} spine_idx = wc.current_query.spine.indexOf(name) while True: @@ -126,7 +126,7 @@ def got_spine_item(query_id, spine_idx, result): return if result.ok: name = wc.current_query.spine[spine_idx] - wc.text_cache[name] = text_from_serialized_html(result.result) + wc.text_cache[name] = text_from_serialized_html(result.result, True) search_in_text_of(name) setTimeout(queue_next_spine_item.bind(None, spine_idx + 1), 0) else: