mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Add code to get the anchor offset map
This commit is contained in:
parent
267ab37964
commit
d4f9c559c3
@ -403,7 +403,8 @@ def unserialize_html_legacy(serialized_data, proceed, postprocess_dom, root_name
|
|||||||
proceeded = True
|
proceeded = True
|
||||||
proceed()
|
proceed()
|
||||||
|
|
||||||
def text_from_serialized_html(data):
|
|
||||||
|
def text_from_serialized_html(data, get_anchor_offset_map):
|
||||||
serialized_data = JSON.parse(data)
|
serialized_data = JSON.parse(data)
|
||||||
tag_map = serialized_data.tag_map
|
tag_map = serialized_data.tag_map
|
||||||
ans = v'[]'
|
ans = v'[]'
|
||||||
@ -415,17 +416,29 @@ def text_from_serialized_html(data):
|
|||||||
if child.n is 'body':
|
if child.n is 'body':
|
||||||
stack.push(child)
|
stack.push(child)
|
||||||
ignore_text = {'script':True, 'style':True, 'title': True}
|
ignore_text = {'script':True, 'style':True, 'title': True}
|
||||||
|
anchor_offset_map = {}
|
||||||
|
text_pos = 0
|
||||||
while stack.length:
|
while stack.length:
|
||||||
node = stack.pop()
|
node = stack.pop()
|
||||||
if jstype(node) is 'string':
|
if jstype(node) is 'string':
|
||||||
ans.push(node)
|
ans.push(node)
|
||||||
|
text_pos += node.length
|
||||||
continue
|
continue
|
||||||
if tag_map:
|
if tag_map:
|
||||||
src = tag_map[node[0]]
|
src = tag_map[node[0]]
|
||||||
else:
|
else:
|
||||||
src = node
|
src = node
|
||||||
|
if get_anchor_offset_map and src.a:
|
||||||
|
for v'var i = 0; i < src.a.length; i++':
|
||||||
|
x = src.a[i]
|
||||||
|
if x[0] is 'id':
|
||||||
|
aid = x[1]
|
||||||
|
if jstype(anchor_offset_map[aid]) is not 'number':
|
||||||
|
anchor_offset_map[aid] = text_pos
|
||||||
|
|
||||||
if src.n and not ignore_text[src.n] and src.x:
|
if src.n and not ignore_text[src.n] and src.x:
|
||||||
ans.push(src.x)
|
ans.push(src.x)
|
||||||
|
text_pos += src.x.length
|
||||||
if src.l:
|
if src.l:
|
||||||
stack.push(src.l)
|
stack.push(src.l)
|
||||||
if tag_map:
|
if tag_map:
|
||||||
@ -435,4 +448,7 @@ def text_from_serialized_html(data):
|
|||||||
if src.c:
|
if src.c:
|
||||||
for v'var i = src.c.length; i-- > 0;':
|
for v'var i = src.c.length; i-- > 0;':
|
||||||
stack.push(v'src.c[i]')
|
stack.push(v'src.c[i]')
|
||||||
return ans.join('')
|
ans = ans.join('')
|
||||||
|
if get_anchor_offset_map:
|
||||||
|
return ans, anchor_offset_map
|
||||||
|
return ans
|
||||||
|
@ -84,7 +84,7 @@ def search_in_text_of(name):
|
|||||||
ctx_size = 75
|
ctx_size = 75
|
||||||
r = wc.regex
|
r = wc.regex
|
||||||
r.lastIndex = 0
|
r.lastIndex = 0
|
||||||
haystack = wc.text_cache[name] or ''
|
haystack = wc.text_cache[name][0] or ''
|
||||||
match_counts = {}
|
match_counts = {}
|
||||||
spine_idx = wc.current_query.spine.indexOf(name)
|
spine_idx = wc.current_query.spine.indexOf(name)
|
||||||
while True:
|
while True:
|
||||||
@ -126,7 +126,7 @@ def got_spine_item(query_id, spine_idx, result):
|
|||||||
return
|
return
|
||||||
if result.ok:
|
if result.ok:
|
||||||
name = wc.current_query.spine[spine_idx]
|
name = wc.current_query.spine[spine_idx]
|
||||||
wc.text_cache[name] = text_from_serialized_html(result.result)
|
wc.text_cache[name] = text_from_serialized_html(result.result, True)
|
||||||
search_in_text_of(name)
|
search_in_text_of(name)
|
||||||
setTimeout(queue_next_spine_item.bind(None, spine_idx + 1), 0)
|
setTimeout(queue_next_spine_item.bind(None, spine_idx + 1), 0)
|
||||||
else:
|
else:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user