Work on adding headers/footers

2025-07-09 03:04:10 -04:00 · 2019-07-27 15:02:53 +05:30 · 2019-07-27 15:02:53 +05:30 · 4e3e0f8461
commit 4e3e0f8461
parent 316d0c35a1
4 changed files with 143 additions and 36 deletions
--- a/manual/conversion.rst
+++ b/manual/conversion.rst
@ -834,44 +834,50 @@ code that get rendered in the header and footer locations. For example, to
 display page numbers centered at the bottom of every page, in green, use the following
 footer template::
-    <p style="text-align:center; color:green">Page _PAGENUM_</p>
+    <footer><div style="margin: auto; color: green">_PAGENUM_</div></footer>
-calibre will automatically replace _PAGENUM_ with the current page number. You
+calibre will automatically replace :code:`_PAGENUM_` with the current page number. You
 can even put different content on even and odd pages, for example the following
 header template will show the title on odd pages and the author on even pages::
-    <p style="text-align:right"><span class="even_page">_AUTHOR_</span><span class="odd_page"><i>_TITLE_</i></span></p>
+    <header style="justify-content: flex-end">
        <div class="even_page">_AUTHOR_</div>
        <div class="odd_page"><i>_TITLE_</i></div>
    </header>
-calibre will automatically replace _TITLE_ and _AUTHOR_ with the title and author
+calibre will automatically replace :code:`_TITLE_` and :code:`_AUTHOR_` with
-of the document being converted.  You can also display text at the left and
+the title and author of the document being converted.  You can also display
-right edges and change the font size, as demonstrated with this header
+text at the left and right edges and change the font size, as demonstrated with
-template::
+this header template::
-    <div style="font-size:x-small"><p style="float:left">_TITLE_</p><p style="float:right;"><i>_AUTHOR_</i></p></div>
+    <header style="justify-content: space-between; font-size: smaller">
        <div>_TITLE_</div>
        <div>_AUTHOR_</div>
    </header>
 This will display the title at the left and the author at the right, in a font
 size smaller than the main text.
 You can also use the current section in templates, as shown below::
-    <p style="text-align:right">_SECTION_</p>
+    <header><div>_SECTION_</div></header>
-_SECTION_ is replaced by whatever the name of the current section is. These
+:code:`_SECTION_` is replaced by whatever the name of the current section is. These
 names are taken from the metadata Table of Contents in the document (the PDF
 Outline). If the document has no table of contents then it will be replaced by
 empty text. If a single PDF page has multiple sections, the first section on
-the page will be used. Similarly, there is a variable named _TOP_LEVEL_SECTION_
+the page will be used. Similarly, there is a variable named :code:`_TOP_LEVEL_SECTION_`
 that can be used to get the name of the current top-level section.
-You can even use javascript inside the header and footer templates, for
+You can even use JavaScript inside the header and footer templates, for
 example, the following template will cause page numbers to start at 4 instead
 of 1::
    <p id="pagenum" style="text-align:center;"></p><script>document.getElementById("pagenum").innerHTML = "" + (_PAGENUM_ + 3)</script>
 .. note:: When adding headers and footers make sure you set the page top and
-    bottom margins to large enough values, under the Page setup section of the
+    bottom margins to large enough values, under the :guilabel:`PDF Output`
-    conversion dialog.
+    section of the conversion dialog.
 Printable Table of Contents
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -104,7 +104,7 @@ _archive_re = re.compile(r'[^ ]+')
 self_closing_bad_tags = {'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b',
 'bdo', 'blockquote', 'body', 'button', 'cite', 'code', 'dd', 'del', 'details',
 'dfn', 'div', 'dl', 'dt', 'em', 'fieldset', 'figcaption', 'figure', 'footer',
-'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'i', 'ins', 'kbd',
+'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'i', 'iframe', 'ins', 'kbd',
 'label', 'legend', 'li', 'map', 'mark', 'meter', 'nav', 'ol', 'output', 'p',
 'pre', 'progress', 'q', 'rp', 'rt', 'samp', 'section', 'select', 'small',
 'span', 'strong', 'sub', 'summary', 'sup', 'textarea', 'time', 'ul', 'var',
@ -400,8 +400,8 @@ def xml2str(root, pretty_print=False, strip_comments=False, with_tail=True):
    return ans
-def xml2text(elem, pretty_print=False):
+def xml2text(elem, pretty_print=False, method='text'):
-    return etree.tostring(elem, method='text', encoding='unicode', with_tail=False, pretty_print=pretty_print)
+    return etree.tostring(elem, method=method, encoding='unicode', with_tail=False, pretty_print=pretty_print)
 def escape_cdata(root):
--- a/src/calibre/ebooks/pdf/develop.py
+++ b/src/calibre/ebooks/pdf/develop.py
@ -18,7 +18,7 @@ OUTPUT = '/t/dev.pdf'
 class Renderer(QWebEnginePage):
    def do_print(self, ok):
-        p = QPageLayout(QPageSize(QPageSize(QPageSize.A6)), QPageLayout.Portrait, QMarginsF(10, 10, 10, 10))
+        p = QPageLayout(QPageSize(QPageSize(QPageSize.A4)), QPageLayout.Portrait, QMarginsF(72, 0, 72, 0))
        self.printToPdf(self.print_finished, p)
    def print_finished(self, pdf_data):
@ -28,8 +28,6 @@ class Renderer(QWebEnginePage):
        podofo = get_podofo()
        doc = podofo.PDFDoc()
        doc.load(pdf_data)
        from pprint import pprint
        pprint(doc.extract_anchors())
 def main():
--- a/src/calibre/ebooks/pdf/html_writer.py
+++ b/src/calibre/ebooks/pdf/html_writer.py
@ -13,6 +13,7 @@ import signal
 import sys
 from collections import namedtuple
 from io import BytesIO
 from itertools import repeat
 from operator import attrgetter, itemgetter
 from PyQt5.Qt import (
@ -20,10 +21,10 @@ from PyQt5.Qt import (
 )
 from PyQt5.QtWebEngineWidgets import QWebEnginePage
-from calibre import detect_ncpus
+from calibre import detect_ncpus, prepare_string_for_xml
 from calibre.constants import iswindows
 from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet
-from calibre.ebooks.oeb.base import XHTML
+from calibre.ebooks.oeb.base import XHTML, xml2text
 from calibre.ebooks.oeb.polish.container import Container as ContainerBase
 from calibre.ebooks.oeb.polish.toc import get_toc
 from calibre.ebooks.pdf.image_writer import (
@ -55,7 +56,7 @@ def data_as_pdf_doc(data):
 def create_skeleton(container):
-    spine_name = next(container.spine_names)[0]
+    spine_name = tuple(container.spine_names)[-1][0]
    root = container.parsed(spine_name)
    root = copy.deepcopy(root)
    body = root[-1]
@ -213,20 +214,23 @@ class RenderManager(QObject):
            QApplication.instance().exit(OK)
 def resolve_margins(margins, page_layout):
    old_margins = page_layout.marginsPoints()
    def m(which):
        ans = getattr(margins, which, None)
        if ans is None:
            ans = getattr(old_margins, which)()
        return ans
    return Margins(*map(m, 'left top right bottom'.split()))
 def job_for_name(container, name, margins, page_layout):
    index_file = container.name_to_abspath(name)
    if margins:
        def m(which):
            ans = getattr(margins, which)
            if ans is None:
                ans = getattr(old_margins, which)()
            return ans
        page_layout = QPageLayout(page_layout)
        page_layout.setUnits(QPageLayout.Point)
-        old_margins = page_layout.marginsPoints()
+        new_margins = QMarginsF(*resolve_margins(margins, page_layout))
        new_margins = QMarginsF(*map(m, 'left top right bottom'.split()))
        page_layout.setMargins(new_margins)
    return index_file, page_layout, name
 # }}}
@ -324,14 +328,14 @@ def make_anchors_unique(container):
        else:
            name = container.href_to_name(href, base)
        if not name:
-            return url
+            return url.rstrip('#')
        if not frag and name in spine_names:
            replacer.replaced = True
            return 'https://calibre-pdf-anchor.n#' + name
        key = name, frag
        new_frag = mapping.get(key)
        if new_frag is None:
-            return url
+            return url.rstrip('#')
        replacer.replaced = True
        return 'https://calibre-pdf-anchor.a#' + new_frag
        if url.startswith('#'):
@ -782,6 +786,98 @@ def test_merge_fonts():
 # }}}
 # Header/footer {{{
 PAGE_NUMBER_TEMPLATE = '<footer><div style="margin: auto">_PAGENUM_</div></footer>'
 def add_header_footer(manager, opts, pdf_doc, container, page_number_display_map, page_layout, page_margins_map, pdf_metadata, report_progress):
    header_template, footer_template = opts.pdf_header_template, opts.pdf_footer_template
    if not footer_template and opts.pdf_page_numbers:
        footer_template = PAGE_NUMBER_TEMPLATE
    if not header_template and not footer_template:
        return
    report_progress(0.8, _('Adding headers and footers'))
    name = create_skeleton(container)
    root = container.parsed(name)
    body = root[-1]
    body.set('style', 'margin: 0; padding: 0; border-width: 0')
    skeleton = xml2text(root, method='html')
    job = job_for_name(container, name, Margins(0, 0, 0, 0), page_layout)
    def m(tag_name, text=None, **attrs):
        ans = root.makeelement(XHTML(tag_name), **attrs)
        if text is not None:
            ans.text = text
        return ans
    justify = 'flex-end'
    if header_template:
        justify = 'space-between' if footer_template else 'flex-start'
    del root[0][:]
    root[0].append(m('style', '''
        * {{ margin: 0; padding: 0; border-width: 0; box-sizing: border-box; }}
        div {{
            page-break-inside: avoid;
            page-break-after:always;
            display: flex;
            flex-direction: column;
            height: 100%;
            margin-bottom: 0pt;
            justify-content: {justify}
        }}
    '''.format(justify=justify)))
    def create_iframe(margins, f, is_footer=False):
        style = {
            'margin-left': '{}pt'.format(margins.left),
            'margin-right': '{}pt'.format(margins.right),
            'height': '{}pt'.format(margins.bottom if is_footer else margins.top)}
        style = '; '.join('{}: {}'.format(k, v) for k, v in iteritems(style))
        return m(
            'iframe', seamless='seamless', style=style,
            srcdoc=f
        )
    def format_template(template, page_num):
        # TODO: _SECTION_ and _TOP_LEVEL_SECTION_
        template = template.replace('_PAGENUM_', unicode_type(page_number_display_map[page_num]))
        extra_style = 'header, footer { margin: 0; padding: 0; border-width: 0; height: 100vh; display: flex; align-items: center }'
        if page_num % 2:
            extra_style += '.even_page { display: none }'
        else:
            extra_style += '.odd_page { display: none }'
        template = template.replace('_TITLE_', prepare_string_for_xml(pdf_metadata.title, True))
        template = template.replace('_AUTHOR_', prepare_string_for_xml(pdf_metadata.author, True))
        template += '<style>{}</style>'.format(extra_style)
        repl = skeleton.replace('</body>', template + '</body>', 1)
        if repl == skeleton:
            raise ValueError('Failed to insert template into skeleton: ' + skeleton)
        return repl
    for page_num in range(1, pdf_doc.page_count() + 1):
        div = m('div')
        body.append(div)
        margins = page_margins_map[page_num - 1]
        if header_template:
            f = format_template(header_template, page_num)
            div.append(create_iframe(margins, f))
        if footer_template:
            f = format_template(footer_template, page_num)
            div.append(create_iframe(margins, f, True))
    container.commit()
    results = manager.convert_html_files([job], settle_time=2)
    data = results[name]
    if not isinstance(data, bytes):
        raise SystemExit(data)
    doc = data_as_pdf_doc(data)
    pdf_doc.append(doc)
    report_progress(0.9, _('Headers and footers added'))
 # }}}
 def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None, report_progress=lambda x, y: None):
    container = Container(opf_path, log)
    report_progress(0.05, _('Parsed all content for markup transformation'))
@ -802,6 +898,7 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
        jobs.append(job_for_name(container, margin_file.name, margin_file.margins, page_layout))
    results = manager.convert_html_files(jobs, settle_time=1)
    num_pages = 0
    page_margins_map = []
    for margin_file in margin_files:
        name = margin_file.name
        data = results[name]
@ -809,7 +906,9 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
            raise SystemExit(data)
        doc = data_as_pdf_doc(data)
        anchor_locations.update(get_anchor_locations(doc, num_pages + 1, links_page_uuid))
-        num_pages += doc.page_count()
+        doc_pages = doc.page_count()
        page_margins_map.extend(repeat(resolve_margins(margin_file.margins, page_layout), doc_pages))
        num_pages += doc_pages
        if pdf_doc is None:
            pdf_doc = doc
@ -837,11 +936,15 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
        add_toc(PDFOutlineRoot(pdf_doc), toc)
    report_progress(0.75, _('Added links to PDF content'))
    pdf_metadata = PDFMetadata(metadata)
    add_header_footer(manager, opts, pdf_doc, container, page_number_display_map, page_layout, page_margins_map, pdf_metadata, report_progress)
    merge_fonts(pdf_doc)
    num_removed = dedup_type3_fonts(pdf_doc)
    if num_removed:
        log('Removed', num_removed, 'duplicated Type3 glyphs')
    # TODO: dedup images
    # TODO: Support for mathematics
    num_removed = remove_unused_fonts(pdf_doc)
@ -852,7 +955,7 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
        add_cover(pdf_doc, cover_data, page_layout, opts)
    if metadata is not None:
-        update_metadata(pdf_doc, PDFMetadata(metadata))
+        update_metadata(pdf_doc, pdf_metadata)
    report_progress(1, _('Updated metadata in PDF'))
    if opts.uncompressed_pdf: