When encoding CFI ignore range wrappers

2025-07-09 03:04:10 -04:00 · 2020-04-06 20:21:31 +05:30 · 2020-04-06 20:21:31 +05:30 · 524f891f95
commit 524f891f95
parent 106f8900a9
1 changed files with 93 additions and 26 deletions
--- a/src/pyj/read_book/cfi.pyj
+++ b/src/pyj/read_book/cfi.pyj
@ -130,32 +130,103 @@ def set_current_time(target, val):  # {{{
        target.addEventListener("canplay", def(): target.currentTime = val;, False)
 # }}}
-def encode(doc, node, offset, tail):  # {{{
+# def encode(doc, node, offset, tail): {{{
    cfi = tail or ""
 def is_text_node(node):
    return node.nodeType is Node.TEXT_NODE or node.nodeType is Node.CDATA_SECTION_NODE
 def text_length_in_range_wrapper(node):
    p = node.firstChild
    ans = 0
    while p:
        if is_text_node(p):
            ans += p.nodeValue.length
        elif p.nodeType is Node.ELEMENT_NODE and p.dataset.calibreRangeWrapper:
            ans += text_length_in_range_wrapper(p)
        p = p.nextSibling
    return ans
 def adjust_node_for_text_offset(node):
    offset = 0
    while True:
        p = node.previousSibling
        if not p or p.nodeType > Node.COMMENT_NODE:
            break
        if is_text_node(p):
            offset += p.nodeValue.length
        elif p.nodeType is Node.ELEMENT_NODE and p.dataset.calibreRangeWrapper:
            offset += text_length_in_range_wrapper(p)
        node = p
    return node, offset
 def unwrapped_nodes(range_wrapper):
    ans = v'[]'
    for child in range_wrapper.childNodes:
        if child.nodeType is Node.ELEMENT_NODE and child.dataset.calibreRangeWrapper:
            ans = ans.concat(unwrapped_nodes(child))
        else:
            ans.push(child)
    return ans
 def increment_index_for_child(child, index, sentinel):
    is_element = child.nodeType is Node.ELEMENT_NODE
    if is_element and child.dataset.calibreRangeWrapper:
        nodes = unwrapped_nodes(child)
        index = increment_index_for_children(nodes, index, sentinel)
    else:
        index |= 1  # increment if even
        if is_element:
            index += 1
    return index
 def increment_index_for_children(children, index, sentinel):
    for i in range(children.length):
        child = children[i]
        index = increment_index_for_child(child, index, sentinel)
        if child is sentinel:
            break
    return index
 def non_range_wrapper_parent(node):
    p = node.parentNode
    while p.dataset?.calibreRangeWrapper:
        p = p.parentNode
    return p
 def encode(doc, node, offset, tail):
    cfi = tail or ""
    # Handle the offset, if any
    if node.nodeType is Node.ELEMENT_NODE:
        if jstype(offset) is 'number':
            q = node.childNodes.item(offset)
-            if q and q.nodeType is Node.ELEMENT_NODE:
+            if q:
-                node = q
+                if q.nodeType is Node.ELEMENT_NODE:
-    elif Node.TEXT_NODE <= node.nodeType <= Node.ENTITY_NODE:
+                    node = q
                    if node.dataset.calibreRangeWrapper:
                        if not node.firstChild:
                            node.appendChild(document.createTextNode(''))
                        node = node.firstChild
    if is_text_node(node):
        offset = offset or 0
-        while True:
+        if node.parentNode and node.parentNode.dataset.calibreRangeWrapper:
-            p = node.previousSibling
+            node = node.parentNode
-            if not p or p.nodeType > Node.COMMENT_NODE:
+        node, additional_offset = adjust_node_for_text_offset(node)
-                break
+        offset += additional_offset
            # log("previous sibling:"+ p + " " + p?.nodeType + " length: " + p?.nodeValue?.length)
            if p.nodeType not in v'[Node.ATTRIBUTE_NODE, Node.COMMENT_NODE]' and p.nodeValue and p.nodeValue.length:
                offset += p.nodeValue.length
            node = p
        cfi = ":" + offset + cfi
-    else: # Not handled
+    elif node.nodeType is not Node.ELEMENT_NODE: # Not handled
-        print(str.format("Offsets for nodes of type {} are not handled", node.nodeType))
+        print(f"Offsets for nodes of type {node.nodeType} are not handled")
    # Construct the path to node from root
    is_first = True
    while node is not doc:
-        p = node.parentNode
+        p = non_range_wrapper_parent(node)
        if not p:
            if node.nodeType == Node.DOCUMENT_NODE:  # Document node (iframe)
                win = node.defaultView
@ -164,17 +235,13 @@ def encode(doc, node, offset, tail):  # {{{
                    cfi = "!" + cfi
                    continue
            break
        # Find position of node in parent
        index = 0
        child = p.firstChild
        while True:
            index |= 1 # Increment index by 1 if it is even
            if child.nodeType is Node.ELEMENT_NODE:
                index += 1
            if child is node:
                break
            child = child.nextSibling
        # Find position of node in parent
        index = increment_index_for_children(p.childNodes, 0, node)
        if is_first:
            is_first = False
            if node.nodeType is Node.ELEMENT_NODE and node.dataset.calibreRangeWrapper:
                index -= 1
        # Add id assertions for robustness where possible
        id = node.id
        idspec = ('[' + escape_for_cfi(id) + ']') if id else ''