From 4e3e0f8461a137c54611a7e136513fd48b2e346c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 27 Jul 2019 15:02:53 +0530 Subject: [PATCH] Work on adding headers/footers --- manual/conversion.rst | 34 ++++--- src/calibre/ebooks/oeb/base.py | 6 +- src/calibre/ebooks/pdf/develop.py | 4 +- src/calibre/ebooks/pdf/html_writer.py | 135 +++++++++++++++++++++++--- 4 files changed, 143 insertions(+), 36 deletions(-) diff --git a/manual/conversion.rst b/manual/conversion.rst index 8bdefe8e68..7fd8bfe237 100644 --- a/manual/conversion.rst +++ b/manual/conversion.rst @@ -834,44 +834,50 @@ code that get rendered in the header and footer locations. For example, to display page numbers centered at the bottom of every page, in green, use the following footer template:: -

Page _PAGENUM_

+ -calibre will automatically replace _PAGENUM_ with the current page number. You +calibre will automatically replace :code:`_PAGENUM_` with the current page number. You can even put different content on even and odd pages, for example the following header template will show the title on odd pages and the author on even pages:: -

_AUTHOR__TITLE_

+
+
_AUTHOR_
+
_TITLE_
+
-calibre will automatically replace _TITLE_ and _AUTHOR_ with the title and author -of the document being converted. You can also display text at the left and -right edges and change the font size, as demonstrated with this header -template:: +calibre will automatically replace :code:`_TITLE_` and :code:`_AUTHOR_` with +the title and author of the document being converted. You can also display +text at the left and right edges and change the font size, as demonstrated with +this header template:: -

_TITLE_

_AUTHOR_

+
+
_TITLE_
+
_AUTHOR_
+
This will display the title at the left and the author at the right, in a font size smaller than the main text. You can also use the current section in templates, as shown below:: -

_SECTION_

+
_SECTION_
-_SECTION_ is replaced by whatever the name of the current section is. These +:code:`_SECTION_` is replaced by whatever the name of the current section is. These names are taken from the metadata Table of Contents in the document (the PDF Outline). If the document has no table of contents then it will be replaced by empty text. If a single PDF page has multiple sections, the first section on -the page will be used. Similarly, there is a variable named _TOP_LEVEL_SECTION_ +the page will be used. Similarly, there is a variable named :code:`_TOP_LEVEL_SECTION_` that can be used to get the name of the current top-level section. -You can even use javascript inside the header and footer templates, for +You can even use JavaScript inside the header and footer templates, for example, the following template will cause page numbers to start at 4 instead of 1::

.. note:: When adding headers and footers make sure you set the page top and - bottom margins to large enough values, under the Page setup section of the - conversion dialog. + bottom margins to large enough values, under the :guilabel:`PDF Output` + section of the conversion dialog. Printable Table of Contents ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index e330668a34..0588257e67 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -104,7 +104,7 @@ _archive_re = re.compile(r'[^ ]+') self_closing_bad_tags = {'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b', 'bdo', 'blockquote', 'body', 'button', 'cite', 'code', 'dd', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'em', 'fieldset', 'figcaption', 'figure', 'footer', -'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'i', 'ins', 'kbd', +'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'i', 'iframe', 'ins', 'kbd', 'label', 'legend', 'li', 'map', 'mark', 'meter', 'nav', 'ol', 'output', 'p', 'pre', 'progress', 'q', 'rp', 'rt', 'samp', 'section', 'select', 'small', 'span', 'strong', 'sub', 'summary', 'sup', 'textarea', 'time', 'ul', 'var', @@ -400,8 +400,8 @@ def xml2str(root, pretty_print=False, strip_comments=False, with_tail=True): return ans -def xml2text(elem, pretty_print=False): - return etree.tostring(elem, method='text', encoding='unicode', with_tail=False, pretty_print=pretty_print) +def xml2text(elem, pretty_print=False, method='text'): + return etree.tostring(elem, method=method, encoding='unicode', with_tail=False, pretty_print=pretty_print) def escape_cdata(root): diff --git a/src/calibre/ebooks/pdf/develop.py b/src/calibre/ebooks/pdf/develop.py index 4376c7eee6..e9d88e61d0 100644 --- a/src/calibre/ebooks/pdf/develop.py +++ b/src/calibre/ebooks/pdf/develop.py @@ -18,7 +18,7 @@ OUTPUT = '/t/dev.pdf' class Renderer(QWebEnginePage): def do_print(self, ok): - p = QPageLayout(QPageSize(QPageSize(QPageSize.A6)), QPageLayout.Portrait, QMarginsF(10, 10, 10, 10)) + p = QPageLayout(QPageSize(QPageSize(QPageSize.A4)), QPageLayout.Portrait, QMarginsF(72, 0, 72, 0)) self.printToPdf(self.print_finished, p) def print_finished(self, pdf_data): @@ -28,8 +28,6 @@ class Renderer(QWebEnginePage): podofo = get_podofo() doc = podofo.PDFDoc() doc.load(pdf_data) - from pprint import pprint - pprint(doc.extract_anchors()) def main(): diff --git a/src/calibre/ebooks/pdf/html_writer.py b/src/calibre/ebooks/pdf/html_writer.py index e73964c877..5a6acb041f 100644 --- a/src/calibre/ebooks/pdf/html_writer.py +++ b/src/calibre/ebooks/pdf/html_writer.py @@ -13,6 +13,7 @@ import signal import sys from collections import namedtuple from io import BytesIO +from itertools import repeat from operator import attrgetter, itemgetter from PyQt5.Qt import ( @@ -20,10 +21,10 @@ from PyQt5.Qt import ( ) from PyQt5.QtWebEngineWidgets import QWebEnginePage -from calibre import detect_ncpus +from calibre import detect_ncpus, prepare_string_for_xml from calibre.constants import iswindows from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet -from calibre.ebooks.oeb.base import XHTML +from calibre.ebooks.oeb.base import XHTML, xml2text from calibre.ebooks.oeb.polish.container import Container as ContainerBase from calibre.ebooks.oeb.polish.toc import get_toc from calibre.ebooks.pdf.image_writer import ( @@ -55,7 +56,7 @@ def data_as_pdf_doc(data): def create_skeleton(container): - spine_name = next(container.spine_names)[0] + spine_name = tuple(container.spine_names)[-1][0] root = container.parsed(spine_name) root = copy.deepcopy(root) body = root[-1] @@ -213,20 +214,23 @@ class RenderManager(QObject): QApplication.instance().exit(OK) +def resolve_margins(margins, page_layout): + old_margins = page_layout.marginsPoints() + + def m(which): + ans = getattr(margins, which, None) + if ans is None: + ans = getattr(old_margins, which)() + return ans + return Margins(*map(m, 'left top right bottom'.split())) + + def job_for_name(container, name, margins, page_layout): index_file = container.name_to_abspath(name) if margins: - - def m(which): - ans = getattr(margins, which) - if ans is None: - ans = getattr(old_margins, which)() - return ans - page_layout = QPageLayout(page_layout) page_layout.setUnits(QPageLayout.Point) - old_margins = page_layout.marginsPoints() - new_margins = QMarginsF(*map(m, 'left top right bottom'.split())) + new_margins = QMarginsF(*resolve_margins(margins, page_layout)) page_layout.setMargins(new_margins) return index_file, page_layout, name # }}} @@ -324,14 +328,14 @@ def make_anchors_unique(container): else: name = container.href_to_name(href, base) if not name: - return url + return url.rstrip('#') if not frag and name in spine_names: replacer.replaced = True return 'https://calibre-pdf-anchor.n#' + name key = name, frag new_frag = mapping.get(key) if new_frag is None: - return url + return url.rstrip('#') replacer.replaced = True return 'https://calibre-pdf-anchor.a#' + new_frag if url.startswith('#'): @@ -782,6 +786,98 @@ def test_merge_fonts(): # }}} +# Header/footer {{{ + +PAGE_NUMBER_TEMPLATE = '' + + +def add_header_footer(manager, opts, pdf_doc, container, page_number_display_map, page_layout, page_margins_map, pdf_metadata, report_progress): + header_template, footer_template = opts.pdf_header_template, opts.pdf_footer_template + if not footer_template and opts.pdf_page_numbers: + footer_template = PAGE_NUMBER_TEMPLATE + if not header_template and not footer_template: + return + report_progress(0.8, _('Adding headers and footers')) + name = create_skeleton(container) + root = container.parsed(name) + body = root[-1] + body.set('style', 'margin: 0; padding: 0; border-width: 0') + skeleton = xml2text(root, method='html') + job = job_for_name(container, name, Margins(0, 0, 0, 0), page_layout) + + def m(tag_name, text=None, **attrs): + ans = root.makeelement(XHTML(tag_name), **attrs) + if text is not None: + ans.text = text + return ans + + justify = 'flex-end' + if header_template: + justify = 'space-between' if footer_template else 'flex-start' + del root[0][:] + root[0].append(m('style', ''' + * {{ margin: 0; padding: 0; border-width: 0; box-sizing: border-box; }} + div {{ + page-break-inside: avoid; + page-break-after:always; + display: flex; + flex-direction: column; + height: 100%; + margin-bottom: 0pt; + justify-content: {justify} + }} + '''.format(justify=justify))) + + def create_iframe(margins, f, is_footer=False): + style = { + 'margin-left': '{}pt'.format(margins.left), + 'margin-right': '{}pt'.format(margins.right), + 'height': '{}pt'.format(margins.bottom if is_footer else margins.top)} + style = '; '.join('{}: {}'.format(k, v) for k, v in iteritems(style)) + return m( + 'iframe', seamless='seamless', style=style, + srcdoc=f + ) + + def format_template(template, page_num): + # TODO: _SECTION_ and _TOP_LEVEL_SECTION_ + template = template.replace('_PAGENUM_', unicode_type(page_number_display_map[page_num])) + extra_style = 'header, footer { margin: 0; padding: 0; border-width: 0; height: 100vh; display: flex; align-items: center }' + if page_num % 2: + extra_style += '.even_page { display: none }' + else: + extra_style += '.odd_page { display: none }' + template = template.replace('_TITLE_', prepare_string_for_xml(pdf_metadata.title, True)) + template = template.replace('_AUTHOR_', prepare_string_for_xml(pdf_metadata.author, True)) + template += ''.format(extra_style) + repl = skeleton.replace('', template + '', 1) + if repl == skeleton: + raise ValueError('Failed to insert template into skeleton: ' + skeleton) + return repl + + for page_num in range(1, pdf_doc.page_count() + 1): + div = m('div') + body.append(div) + margins = page_margins_map[page_num - 1] + if header_template: + f = format_template(header_template, page_num) + div.append(create_iframe(margins, f)) + if footer_template: + f = format_template(footer_template, page_num) + div.append(create_iframe(margins, f, True)) + + container.commit() + results = manager.convert_html_files([job], settle_time=2) + data = results[name] + if not isinstance(data, bytes): + raise SystemExit(data) + doc = data_as_pdf_doc(data) + pdf_doc.append(doc) + report_progress(0.9, _('Headers and footers added')) + +# }}} + + def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None, report_progress=lambda x, y: None): container = Container(opf_path, log) report_progress(0.05, _('Parsed all content for markup transformation')) @@ -802,6 +898,7 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co jobs.append(job_for_name(container, margin_file.name, margin_file.margins, page_layout)) results = manager.convert_html_files(jobs, settle_time=1) num_pages = 0 + page_margins_map = [] for margin_file in margin_files: name = margin_file.name data = results[name] @@ -809,7 +906,9 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co raise SystemExit(data) doc = data_as_pdf_doc(data) anchor_locations.update(get_anchor_locations(doc, num_pages + 1, links_page_uuid)) - num_pages += doc.page_count() + doc_pages = doc.page_count() + page_margins_map.extend(repeat(resolve_margins(margin_file.margins, page_layout), doc_pages)) + num_pages += doc_pages if pdf_doc is None: pdf_doc = doc @@ -837,11 +936,15 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co add_toc(PDFOutlineRoot(pdf_doc), toc) report_progress(0.75, _('Added links to PDF content')) + pdf_metadata = PDFMetadata(metadata) + add_header_footer(manager, opts, pdf_doc, container, page_number_display_map, page_layout, page_margins_map, pdf_metadata, report_progress) + merge_fonts(pdf_doc) num_removed = dedup_type3_fonts(pdf_doc) if num_removed: log('Removed', num_removed, 'duplicated Type3 glyphs') + # TODO: dedup images # TODO: Support for mathematics num_removed = remove_unused_fonts(pdf_doc) @@ -852,7 +955,7 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co add_cover(pdf_doc, cover_data, page_layout, opts) if metadata is not None: - update_metadata(pdf_doc, PDFMetadata(metadata)) + update_metadata(pdf_doc, pdf_metadata) report_progress(1, _('Updated metadata in PDF')) if opts.uncompressed_pdf: