From 92b9566b0af2f8af60dfbe17268955d5a3a80b1d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 19 Aug 2024 12:23:20 +0530 Subject: [PATCH] Store page list in book metadata when rendering --- src/calibre/ebooks/oeb/polish/toc.py | 21 ++++++++++++++++++--- src/calibre/srv/render_book.py | 5 ++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/oeb/polish/toc.py b/src/calibre/ebooks/oeb/polish/toc.py index 1147f26938..602b278940 100644 --- a/src/calibre/ebooks/oeb/polish/toc.py +++ b/src/calibre/ebooks/oeb/polish/toc.py @@ -219,18 +219,33 @@ def parse_nav(container, nav_name): root = container.parsed(nav_name) toc_root = TOC() toc_root.lang = toc_root.uid = None + seen_toc = seen_pagelist = False et = '{%s}type' % EPUB_NS - for nav in root.iterdescendants(XHTML('nav')): - if nav.get(et) == 'toc': + for nav in XPath('descendant::h:nav[@epub:type]')(root): + nt = nav.get(et) + if nt == 'toc' and not seen_toc: ol = first_child(nav, XHTML('ol')) if ol is not None: + seen_toc = True process_nav_node(container, ol, toc_root, nav_name) for h in nav.iterchildren(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())): text = etree.tostring(h, method='text', encoding='unicode', with_tail=False) or h.get('title') if text: toc_root.toc_title = text break - break + elif nt == 'page-list' and not seen_pagelist: + ol = first_child(nav, XHTML('ol')) + if ol is not None and not seen_pagelist: + seen_pagelist = True + for li in ol.iterchildren(XHTML('li')): + for a in li.iterchildren(XHTML('a')): + href = a.get('href') + if href: + text = (etree.tostring(a, method='text', encoding='unicode', with_tail=False) or a.get('title')).strip() + if text: + dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name) + frag = urlparse(href).fragment or None + toc_root.page_list.append({'dest': dest, 'pagenum': text, 'frag': frag}) return toc_root diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py index bce7fb7ac4..8e5c52e289 100644 --- a/src/calibre/srv/render_book.py +++ b/src/calibre/srv/render_book.py @@ -713,7 +713,9 @@ def process_exploded_book( name == 'mimetype' or not container.has_name_and_is_not_empty(name)} raster_cover_name, titlepage_name = create_cover_page(container, input_fmt.lower(), is_comic, book_metadata) - toc = get_toc(container, verify_destinations=False).to_dict(count()) + tocobj = get_toc(container, verify_destinations=False) + page_list = tocobj.page_list or [] + toc = tocobj.to_dict(count()) if not toc or not toc.get('children'): toc = from_xpaths(container, ['//h:h1', '//h:h2', '//h:h3']).to_dict(count()) spine = [name for name, is_linear in container.spine_names] @@ -746,6 +748,7 @@ def process_exploded_book( 'landmarks': landmarks, 'link_to_map': {}, 'page_progression_direction': page_progression_direction, + 'page_list': page_list, } names = sorted(