Ignore range wrappers when decoding CFI as well

2025-07-09 03:04:10 -04:00 · 2020-04-08 14:19:14 +05:30 · 2020-04-08 14:19:14 +05:30 · be78740efe
commit be78740efe
parent 5f0440364a
4 changed files with 87 additions and 53 deletions
--- a/src/pyj/read_book/cfi.pyj
+++ b/src/pyj/read_book/cfi.pyj
@ -255,7 +255,62 @@ def encode(doc, node, offset, tail):
    return cfi
 # }}}
-def decode(cfi, doc): # {{{
+
 # def decode(cfi, doc):  {{{
 def node_at_index(nodes, target, index, iter_text_nodes):
    for i in range(nodes.length):
        node = nodes[i]
        is_element = node.nodeType is Node.ELEMENT_NODE
        if is_element and node.dataset.calibreRangeWrapper:
            q, index = node_at_index(unwrapped_nodes(node), target, index, iter_text_nodes)
            if q:
                return q, index
            continue
        if (iter_text_nodes and not is_text_node(node)) or (not iter_text_nodes and not is_element):
            continue
        if index is target:
            return node, index
        index += 1
    return None, index
 def node_for_path_step(parent, target, assertion):
    if assertion:
        q = document.getElementById(assertion)
        if q:
            return q
    is_element = target % 2 == 0
    target //= 2
    if is_element and target > 0:
        target -= 1
    return node_at_index(parent.childNodes, target, 0, not is_element)[0]
 def node_for_text_offset(nodes, offset, forward, first_node):
    last_text_node = None
    seen_first = False
    for i in range(nodes.length):
        node = nodes[i]
        if not seen_first:
            if not first_node or node.isSameNode(first_node):
                seen_first = True
            else:
                continue
        if is_text_node(node):
            l = node.nodeValue.length
            if offset < l or (not forward and offset is l):
                return node, offset, True
            last_text_node = node
            offset -= l
        elif node.nodeType is Node.ELEMENT_NODE and node.dataset.calibreRangeWrapper:
            qn, offset, ok = node_for_text_offset(unwrapped_nodes(node), offset, forward)
            if ok:
                return qn, offset, True
    return last_text_node, offset, False
 def decode(cfi, doc):
    doc = doc or window.document
    simple_node_regex = ///
        ^/(\d+)          # The node count
@ -271,35 +326,13 @@ def decode(cfi, doc): # {{{
            assertion = r[2]
            if assertion:
                assertion = unescape_from_cfi(assertion.slice(1, assertion.length-1))
-            index = 0
+            q = node_for_path_step(node, target, assertion)
-            child = node.firstChild
+            if q:
-
+                node = q
-            while True:
+                cfi = cfi.substr(r[0].length)
-                if not child:
+            else:
-                    if assertion: # Try to use the assertion to find the node
+                error = "No matching child found for CFI: " + cfi
-                        child = doc.getElementById(assertion)
+                break
                        if child:
                            node = child
                    if not child:
                        error = "No matching child found for CFI: " + cfi
                    cfi = cfi.substr(r[0].length)
                    break
                index |= 1 # Increment index by 1 if it is even
                if child.nodeType is Node.ELEMENT_NODE:
                    index += 1
                if index is target:
                    cfi = cfi.substr(r[0].length)
                    node = child
                    if assertion and node.id is not assertion:
                        # The found child does not match the id assertion,
                        # trust the id assertion if an element with that id
                        # exists
                        child = doc.getElementById(assertion)
                        if child:
                            node = child
                    break
                child = child.nextSibling
        else if cfi[0] is '!': # Indirection
            if node.contentDocument:
                node = node.contentDocument
@ -350,26 +383,10 @@ def decode(cfi, doc): # {{{
    # Find the text node that contains the offset
    if offset is not None:
-        while True:
+        orig_offset = offset
-            l = node.nodeValue.length
+        node, offset, ok = node_for_text_offset(node.parentNode.childNodes, offset, point.forward, node)
-            if offset < l or (not point.forward and offset is l):
+        if not ok:
-                break
+            error = "Offset out of range: " + orig_offset
            next = False
            while True:
                nn = node.nextSibling
                if not nn:
                    break
                if Node.TEXT_NODE <= nn.nodeType <= Node.ENTITY_NODE and nn.nodeValue and nn.nodeValue.length:
                    next = nn
                    break
                node = nn
            if not next:
                if offset > l:
                    error = "Offset out of range: " + offset
                    offset = l
                break
            node = next
            offset -= l
        point.offset = offset
    point.node = node
--- a/src/pyj/read_book/test_cfi.pyj
+++ b/src/pyj/read_book/test_cfi.pyj
@ -62,11 +62,26 @@ def cfi_with_range_wrappers():
    rw1 = p.lastChild
    p.appendChild(document.createTextNode('123'))
    assert_equal(encode(document, p.firstChild, 1), f'{path_to_p}/1:1')
    assert_equal(decode(f'{path_to_p}/1:1'), {'node': p.firstChild, 'offset': 1})
    assert_equal(encode(document, rw1), f'{path_to_p}/1:3')
    assert_equal(decode(f'{path_to_p}/1:3'), {'node': p.firstChild, 'offset': 3})
    assert_equal(encode(document, rw1.firstChild, 1), f'{path_to_p}/1:4')
    assert_equal(decode(f'{path_to_p}/1:4'), {'node': rw1.firstChild, 'offset': 1})
    assert_equal(encode(document, p.lastChild, 1), f'{path_to_p}/1:7')
    assert_equal(decode(f'{path_to_p}/1:7'), {'node': p.lastChild, 'offset': 1})
    p.appendChild(E.span('456', E.i('789'), data_calibre_range_wrapper='2'))
    assert_equal(encode(document, p.lastChild.firstChild, 1), f'{path_to_p}/1:10')
    itag = p.querySelector('i')
    assert_equal(encode(document, p.lastChild.firstChild, 1), f'{path_to_p}/1:10')
    assert_equal(decode(f'{path_to_p}/1:10'), {'node': p.lastChild.firstChild, 'offset': 1})
    assert_equal(encode(document, itag), f'{path_to_p}/2')
    assert_equal(decode(f'{path_to_p}/2'), {'node': itag})
    assert_equal(encode(document, itag.firstChild, 2), f'{path_to_p}/2/1:2')
    assert_equal(decode(f'{path_to_p}/2/1:2'), {'node': itag.firstChild, 'offset': 2})
    document.body.appendChild(E.p('abc'))
    p = document.body.lastChild
    path_to_p = encode(document, p)
    p.appendChild(document.createTextNode('def'))
    assert_equal(decode(f'{path_to_p}/1:2'), {'node': p.firstChild, 'offset': 2})
    assert_equal(decode(f'{path_to_p}/3:2'), {'node': p.lastChild, 'offset': 2})
--- a/src/pyj/test.pyj
+++ b/src/pyj/test.pyj
@ -21,7 +21,7 @@ def get_matching_tests_for_name(name):
    return ans
-def get_traceback(lines):
+def get_traceback():
    lines = traceback.format_exception()
    last_line = lines[-1]
    final_lines = v'[]'
--- a/src/pyj/testing.pyj
+++ b/src/pyj/testing.pyj
@ -20,7 +20,9 @@ def repr_of(a):
    q = a.outerHTML
    if q:
        return q.split('>')[0] + '>'
-    return a
+    if a.nodeType is Node.TEXT_NODE:
        return repr(a.nodeValue)
    return str(a)
 def assert_equal(a, b, msg, call_site=None):