Store page list in book metadata when rendering

2025-07-09 03:04:10 -04:00 · 2024-08-19 12:23:20 +05:30 · 2024-08-19 12:23:20 +05:30 · 92b9566b0a
commit 92b9566b0a
parent 7a50e5cb5c
2 changed files with 22 additions and 4 deletions
--- a/src/calibre/ebooks/oeb/polish/toc.py
+++ b/src/calibre/ebooks/oeb/polish/toc.py
@ -219,18 +219,33 @@ def parse_nav(container, nav_name):
    root = container.parsed(nav_name)
    toc_root = TOC()
    toc_root.lang = toc_root.uid = None
    seen_toc = seen_pagelist = False
    et = '{%s}type' % EPUB_NS
-    for nav in root.iterdescendants(XHTML('nav')):
+    for nav in XPath('descendant::h:nav[@epub:type]')(root):
-        if nav.get(et) == 'toc':
+        nt = nav.get(et)
        if nt == 'toc' and not seen_toc:
            ol = first_child(nav, XHTML('ol'))
            if ol is not None:
                seen_toc = True
                process_nav_node(container, ol, toc_root, nav_name)
                for h in nav.iterchildren(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())):
                    text = etree.tostring(h, method='text', encoding='unicode', with_tail=False) or h.get('title')
                    if text:
                        toc_root.toc_title = text
                        break
-                break
+        elif nt == 'page-list' and not seen_pagelist:
            ol = first_child(nav, XHTML('ol'))
            if ol is not None and not seen_pagelist:
                seen_pagelist = True
                for li in ol.iterchildren(XHTML('li')):
                    for a in li.iterchildren(XHTML('a')):
                        href = a.get('href')
                        if href:
                            text = (etree.tostring(a, method='text', encoding='unicode', with_tail=False) or a.get('title')).strip()
                            if text:
                                dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name)
                                frag = urlparse(href).fragment or None
                                toc_root.page_list.append({'dest': dest, 'pagenum': text, 'frag': frag})
    return toc_root
--- a/src/calibre/srv/render_book.py
+++ b/src/calibre/srv/render_book.py
@ -713,7 +713,9 @@ def process_exploded_book(
        name == 'mimetype' or not container.has_name_and_is_not_empty(name)}
    raster_cover_name, titlepage_name = create_cover_page(container, input_fmt.lower(), is_comic, book_metadata)
-    toc = get_toc(container, verify_destinations=False).to_dict(count())
+    tocobj = get_toc(container, verify_destinations=False)
    page_list = tocobj.page_list or []
    toc = tocobj.to_dict(count())
    if not toc or not toc.get('children'):
        toc = from_xpaths(container, ['//h:h1', '//h:h2', '//h:h3']).to_dict(count())
    spine = [name for name, is_linear in container.spine_names]
@ -746,6 +748,7 @@ def process_exploded_book(
        'landmarks': landmarks,
        'link_to_map': {},
        'page_progression_direction': page_progression_direction,
        'page_list': page_list,
    }
    names = sorted(