diff --git a/src/pyj/read_book/cfi.pyj b/src/pyj/read_book/cfi.pyj index f0b32832f5..038b0c4449 100644 --- a/src/pyj/read_book/cfi.pyj +++ b/src/pyj/read_book/cfi.pyj @@ -255,7 +255,62 @@ def encode(doc, node, offset, tail): return cfi # }}} -def decode(cfi, doc): # {{{ + +# def decode(cfi, doc): {{{ + +def node_at_index(nodes, target, index, iter_text_nodes): + for i in range(nodes.length): + node = nodes[i] + is_element = node.nodeType is Node.ELEMENT_NODE + if is_element and node.dataset.calibreRangeWrapper: + q, index = node_at_index(unwrapped_nodes(node), target, index, iter_text_nodes) + if q: + return q, index + continue + if (iter_text_nodes and not is_text_node(node)) or (not iter_text_nodes and not is_element): + continue + if index is target: + return node, index + index += 1 + return None, index + + +def node_for_path_step(parent, target, assertion): + if assertion: + q = document.getElementById(assertion) + if q: + return q + is_element = target % 2 == 0 + target //= 2 + if is_element and target > 0: + target -= 1 + return node_at_index(parent.childNodes, target, 0, not is_element)[0] + + +def node_for_text_offset(nodes, offset, forward, first_node): + last_text_node = None + seen_first = False + for i in range(nodes.length): + node = nodes[i] + if not seen_first: + if not first_node or node.isSameNode(first_node): + seen_first = True + else: + continue + if is_text_node(node): + l = node.nodeValue.length + if offset < l or (not forward and offset is l): + return node, offset, True + last_text_node = node + offset -= l + elif node.nodeType is Node.ELEMENT_NODE and node.dataset.calibreRangeWrapper: + qn, offset, ok = node_for_text_offset(unwrapped_nodes(node), offset, forward) + if ok: + return qn, offset, True + return last_text_node, offset, False + + +def decode(cfi, doc): doc = doc or window.document simple_node_regex = /// ^/(\d+) # The node count @@ -271,35 +326,13 @@ def decode(cfi, doc): # {{{ assertion = r[2] if assertion: assertion = unescape_from_cfi(assertion.slice(1, assertion.length-1)) - index = 0 - child = node.firstChild - - while True: - if not child: - if assertion: # Try to use the assertion to find the node - child = doc.getElementById(assertion) - if child: - node = child - if not child: - error = "No matching child found for CFI: " + cfi - cfi = cfi.substr(r[0].length) - break - index |= 1 # Increment index by 1 if it is even - if child.nodeType is Node.ELEMENT_NODE: - index += 1 - if index is target: - cfi = cfi.substr(r[0].length) - node = child - if assertion and node.id is not assertion: - # The found child does not match the id assertion, - # trust the id assertion if an element with that id - # exists - child = doc.getElementById(assertion) - if child: - node = child - break - child = child.nextSibling - + q = node_for_path_step(node, target, assertion) + if q: + node = q + cfi = cfi.substr(r[0].length) + else: + error = "No matching child found for CFI: " + cfi + break else if cfi[0] is '!': # Indirection if node.contentDocument: node = node.contentDocument @@ -350,26 +383,10 @@ def decode(cfi, doc): # {{{ # Find the text node that contains the offset if offset is not None: - while True: - l = node.nodeValue.length - if offset < l or (not point.forward and offset is l): - break - next = False - while True: - nn = node.nextSibling - if not nn: - break - if Node.TEXT_NODE <= nn.nodeType <= Node.ENTITY_NODE and nn.nodeValue and nn.nodeValue.length: - next = nn - break - node = nn - if not next: - if offset > l: - error = "Offset out of range: " + offset - offset = l - break - node = next - offset -= l + orig_offset = offset + node, offset, ok = node_for_text_offset(node.parentNode.childNodes, offset, point.forward, node) + if not ok: + error = "Offset out of range: " + orig_offset point.offset = offset point.node = node diff --git a/src/pyj/read_book/test_cfi.pyj b/src/pyj/read_book/test_cfi.pyj index a60a8b15b7..e1871d1998 100644 --- a/src/pyj/read_book/test_cfi.pyj +++ b/src/pyj/read_book/test_cfi.pyj @@ -62,11 +62,26 @@ def cfi_with_range_wrappers(): rw1 = p.lastChild p.appendChild(document.createTextNode('123')) assert_equal(encode(document, p.firstChild, 1), f'{path_to_p}/1:1') + assert_equal(decode(f'{path_to_p}/1:1'), {'node': p.firstChild, 'offset': 1}) assert_equal(encode(document, rw1), f'{path_to_p}/1:3') + assert_equal(decode(f'{path_to_p}/1:3'), {'node': p.firstChild, 'offset': 3}) assert_equal(encode(document, rw1.firstChild, 1), f'{path_to_p}/1:4') + assert_equal(decode(f'{path_to_p}/1:4'), {'node': rw1.firstChild, 'offset': 1}) assert_equal(encode(document, p.lastChild, 1), f'{path_to_p}/1:7') + assert_equal(decode(f'{path_to_p}/1:7'), {'node': p.lastChild, 'offset': 1}) + p.appendChild(E.span('456', E.i('789'), data_calibre_range_wrapper='2')) - assert_equal(encode(document, p.lastChild.firstChild, 1), f'{path_to_p}/1:10') itag = p.querySelector('i') + assert_equal(encode(document, p.lastChild.firstChild, 1), f'{path_to_p}/1:10') + assert_equal(decode(f'{path_to_p}/1:10'), {'node': p.lastChild.firstChild, 'offset': 1}) assert_equal(encode(document, itag), f'{path_to_p}/2') + assert_equal(decode(f'{path_to_p}/2'), {'node': itag}) assert_equal(encode(document, itag.firstChild, 2), f'{path_to_p}/2/1:2') + assert_equal(decode(f'{path_to_p}/2/1:2'), {'node': itag.firstChild, 'offset': 2}) + + document.body.appendChild(E.p('abc')) + p = document.body.lastChild + path_to_p = encode(document, p) + p.appendChild(document.createTextNode('def')) + assert_equal(decode(f'{path_to_p}/1:2'), {'node': p.firstChild, 'offset': 2}) + assert_equal(decode(f'{path_to_p}/3:2'), {'node': p.lastChild, 'offset': 2}) diff --git a/src/pyj/test.pyj b/src/pyj/test.pyj index 627f273b1b..d3751b00a6 100644 --- a/src/pyj/test.pyj +++ b/src/pyj/test.pyj @@ -21,7 +21,7 @@ def get_matching_tests_for_name(name): return ans -def get_traceback(lines): +def get_traceback(): lines = traceback.format_exception() last_line = lines[-1] final_lines = v'[]' diff --git a/src/pyj/testing.pyj b/src/pyj/testing.pyj index e283efafb6..f218a4b75f 100644 --- a/src/pyj/testing.pyj +++ b/src/pyj/testing.pyj @@ -20,7 +20,9 @@ def repr_of(a): q = a.outerHTML if q: return q.split('>')[0] + '>' - return a + if a.nodeType is Node.TEXT_NODE: + return repr(a.nodeValue) + return str(a) def assert_equal(a, b, msg, call_site=None):