Ignore range wrappers when decoding CFI as well

2025-07-09 03:04:10 -04:00 · 2020-04-08 14:19:14 +05:30 · 2020-04-08 14:19:14 +05:30 · be78740efe
commit be78740efe
parent 5f0440364a
4 changed files with 87 additions and 53 deletions
--- a/src/pyj/read_book/cfi.pyj
+++ b/src/pyj/read_book/cfi.pyj
@ -255,7 +255,62 @@ def encode(doc, node, offset, tail):
    return cfi
 # }}}

-def decode(cfi, doc): # {{{
+
+# def decode(cfi, doc):  {{{
+
+def node_at_index(nodes, target, index, iter_text_nodes):
+    for i in range(nodes.length):
+        node = nodes[i]
+        is_element = node.nodeType is Node.ELEMENT_NODE
+        if is_element and node.dataset.calibreRangeWrapper:
+            q, index = node_at_index(unwrapped_nodes(node), target, index, iter_text_nodes)
+            if q:
+                return q, index
+            continue
+        if (iter_text_nodes and not is_text_node(node)) or (not iter_text_nodes and not is_element):
+            continue
+        if index is target:
+            return node, index
+        index += 1
+    return None, index
+
+
+def node_for_path_step(parent, target, assertion):
+    if assertion:
+        q = document.getElementById(assertion)
+        if q:
+            return q
+    is_element = target % 2 == 0
+    target //= 2
+    if is_element and target > 0:
+        target -= 1
+    return node_at_index(parent.childNodes, target, 0, not is_element)[0]
+
+
+def node_for_text_offset(nodes, offset, forward, first_node):
+    last_text_node = None
+    seen_first = False
+    for i in range(nodes.length):
+        node = nodes[i]
+        if not seen_first:
+            if not first_node or node.isSameNode(first_node):
+                seen_first = True
+            else:
+                continue
+        if is_text_node(node):
+            l = node.nodeValue.length
+            if offset < l or (not forward and offset is l):
+                return node, offset, True
+            last_text_node = node
+            offset -= l
+        elif node.nodeType is Node.ELEMENT_NODE and node.dataset.calibreRangeWrapper:
+            qn, offset, ok = node_for_text_offset(unwrapped_nodes(node), offset, forward)
+            if ok:
+                return qn, offset, True
+    return last_text_node, offset, False
+
+
+def decode(cfi, doc):
    doc = doc or window.document
    simple_node_regex = ///
        ^/(\d+)          # The node count
@ -271,35 +326,13 @@ def decode(cfi, doc): # {{{
            assertion = r[2]
            if assertion:
                assertion = unescape_from_cfi(assertion.slice(1, assertion.length-1))
-            index = 0
-            child = node.firstChild
-
-            while True:
-                if not child:
-                    if assertion: # Try to use the assertion to find the node
-                        child = doc.getElementById(assertion)
-                        if child:
-                            node = child
-                    if not child:
-                        error = "No matching child found for CFI: " + cfi
-                    cfi = cfi.substr(r[0].length)
-                    break
-                index |= 1 # Increment index by 1 if it is even
-                if child.nodeType is Node.ELEMENT_NODE:
-                    index += 1
-                if index is target:
-                    cfi = cfi.substr(r[0].length)
-                    node = child
-                    if assertion and node.id is not assertion:
-                        # The found child does not match the id assertion,
-                        # trust the id assertion if an element with that id
-                        # exists
-                        child = doc.getElementById(assertion)
-                        if child:
-                            node = child
-                    break
-                child = child.nextSibling
-
+            q = node_for_path_step(node, target, assertion)
+            if q:
+                node = q
+                cfi = cfi.substr(r[0].length)
+            else:
+                error = "No matching child found for CFI: " + cfi
+                break
        else if cfi[0] is '!': # Indirection
            if node.contentDocument:
                node = node.contentDocument
@ -350,26 +383,10 @@ def decode(cfi, doc): # {{{

    # Find the text node that contains the offset
    if offset is not None:
-        while True:
-            l = node.nodeValue.length
-            if offset < l or (not point.forward and offset is l):
-                break
-            next = False
-            while True:
-                nn = node.nextSibling
-                if not nn:
-                    break
-                if Node.TEXT_NODE <= nn.nodeType <= Node.ENTITY_NODE and nn.nodeValue and nn.nodeValue.length:
-                    next = nn
-                    break
-                node = nn
-            if not next:
-                if offset > l:
-                    error = "Offset out of range: " + offset
-                    offset = l
-                break
-            node = next
-            offset -= l
+        orig_offset = offset
+        node, offset, ok = node_for_text_offset(node.parentNode.childNodes, offset, point.forward, node)
+        if not ok:
+            error = "Offset out of range: " + orig_offset
        point.offset = offset

    point.node = node
--- a/src/pyj/read_book/test_cfi.pyj
+++ b/src/pyj/read_book/test_cfi.pyj
@ -62,11 +62,26 @@ def cfi_with_range_wrappers():
    rw1 = p.lastChild
    p.appendChild(document.createTextNode('123'))
    assert_equal(encode(document, p.firstChild, 1), f'{path_to_p}/1:1')
+    assert_equal(decode(f'{path_to_p}/1:1'), {'node': p.firstChild, 'offset': 1})
    assert_equal(encode(document, rw1), f'{path_to_p}/1:3')
+    assert_equal(decode(f'{path_to_p}/1:3'), {'node': p.firstChild, 'offset': 3})
    assert_equal(encode(document, rw1.firstChild, 1), f'{path_to_p}/1:4')
+    assert_equal(decode(f'{path_to_p}/1:4'), {'node': rw1.firstChild, 'offset': 1})
    assert_equal(encode(document, p.lastChild, 1), f'{path_to_p}/1:7')
+    assert_equal(decode(f'{path_to_p}/1:7'), {'node': p.lastChild, 'offset': 1})
+
    p.appendChild(E.span('456', E.i('789'), data_calibre_range_wrapper='2'))
-    assert_equal(encode(document, p.lastChild.firstChild, 1), f'{path_to_p}/1:10')
    itag = p.querySelector('i')
+    assert_equal(encode(document, p.lastChild.firstChild, 1), f'{path_to_p}/1:10')
+    assert_equal(decode(f'{path_to_p}/1:10'), {'node': p.lastChild.firstChild, 'offset': 1})
    assert_equal(encode(document, itag), f'{path_to_p}/2')
+    assert_equal(decode(f'{path_to_p}/2'), {'node': itag})
    assert_equal(encode(document, itag.firstChild, 2), f'{path_to_p}/2/1:2')
+    assert_equal(decode(f'{path_to_p}/2/1:2'), {'node': itag.firstChild, 'offset': 2})
+
+    document.body.appendChild(E.p('abc'))
+    p = document.body.lastChild
+    path_to_p = encode(document, p)
+    p.appendChild(document.createTextNode('def'))
+    assert_equal(decode(f'{path_to_p}/1:2'), {'node': p.firstChild, 'offset': 2})
+    assert_equal(decode(f'{path_to_p}/3:2'), {'node': p.lastChild, 'offset': 2})
--- a/src/pyj/test.pyj
+++ b/src/pyj/test.pyj
@ -21,7 +21,7 @@ def get_matching_tests_for_name(name):
    return ans


-def get_traceback(lines):
+def get_traceback():
    lines = traceback.format_exception()
    last_line = lines[-1]
    final_lines = v'[]'
--- a/src/pyj/testing.pyj
+++ b/src/pyj/testing.pyj
@ -20,7 +20,9 @@ def repr_of(a):
    q = a.outerHTML
    if q:
        return q.split('>')[0] + '>'
-    return a
+    if a.nodeType is Node.TEXT_NODE:
+        return repr(a.nodeValue)
+    return str(a)


 def assert_equal(a, b, msg, call_site=None):