Work on adding headers/footers

This commit is contained in:
Kovid Goyal 2019-07-27 15:02:53 +05:30
parent 316d0c35a1
commit 4e3e0f8461
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 143 additions and 36 deletions

View File

@ -834,44 +834,50 @@ code that get rendered in the header and footer locations. For example, to
display page numbers centered at the bottom of every page, in green, use the following display page numbers centered at the bottom of every page, in green, use the following
footer template:: footer template::
<p style="text-align:center; color:green">Page _PAGENUM_</p> <footer><div style="margin: auto; color: green">_PAGENUM_</div></footer>
calibre will automatically replace _PAGENUM_ with the current page number. You calibre will automatically replace :code:`_PAGENUM_` with the current page number. You
can even put different content on even and odd pages, for example the following can even put different content on even and odd pages, for example the following
header template will show the title on odd pages and the author on even pages:: header template will show the title on odd pages and the author on even pages::
<p style="text-align:right"><span class="even_page">_AUTHOR_</span><span class="odd_page"><i>_TITLE_</i></span></p> <header style="justify-content: flex-end">
<div class="even_page">_AUTHOR_</div>
<div class="odd_page"><i>_TITLE_</i></div>
</header>
calibre will automatically replace _TITLE_ and _AUTHOR_ with the title and author calibre will automatically replace :code:`_TITLE_` and :code:`_AUTHOR_` with
of the document being converted. You can also display text at the left and the title and author of the document being converted. You can also display
right edges and change the font size, as demonstrated with this header text at the left and right edges and change the font size, as demonstrated with
template:: this header template::
<div style="font-size:x-small"><p style="float:left">_TITLE_</p><p style="float:right;"><i>_AUTHOR_</i></p></div> <header style="justify-content: space-between; font-size: smaller">
<div>_TITLE_</div>
<div>_AUTHOR_</div>
</header>
This will display the title at the left and the author at the right, in a font This will display the title at the left and the author at the right, in a font
size smaller than the main text. size smaller than the main text.
You can also use the current section in templates, as shown below:: You can also use the current section in templates, as shown below::
<p style="text-align:right">_SECTION_</p> <header><div>_SECTION_</div></header>
_SECTION_ is replaced by whatever the name of the current section is. These :code:`_SECTION_` is replaced by whatever the name of the current section is. These
names are taken from the metadata Table of Contents in the document (the PDF names are taken from the metadata Table of Contents in the document (the PDF
Outline). If the document has no table of contents then it will be replaced by Outline). If the document has no table of contents then it will be replaced by
empty text. If a single PDF page has multiple sections, the first section on empty text. If a single PDF page has multiple sections, the first section on
the page will be used. Similarly, there is a variable named _TOP_LEVEL_SECTION_ the page will be used. Similarly, there is a variable named :code:`_TOP_LEVEL_SECTION_`
that can be used to get the name of the current top-level section. that can be used to get the name of the current top-level section.
You can even use javascript inside the header and footer templates, for You can even use JavaScript inside the header and footer templates, for
example, the following template will cause page numbers to start at 4 instead example, the following template will cause page numbers to start at 4 instead
of 1:: of 1::
<p id="pagenum" style="text-align:center;"></p><script>document.getElementById("pagenum").innerHTML = "" + (_PAGENUM_ + 3)</script> <p id="pagenum" style="text-align:center;"></p><script>document.getElementById("pagenum").innerHTML = "" + (_PAGENUM_ + 3)</script>
.. note:: When adding headers and footers make sure you set the page top and .. note:: When adding headers and footers make sure you set the page top and
bottom margins to large enough values, under the Page setup section of the bottom margins to large enough values, under the :guilabel:`PDF Output`
conversion dialog. section of the conversion dialog.
Printable Table of Contents Printable Table of Contents
^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -104,7 +104,7 @@ _archive_re = re.compile(r'[^ ]+')
self_closing_bad_tags = {'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b', self_closing_bad_tags = {'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b',
'bdo', 'blockquote', 'body', 'button', 'cite', 'code', 'dd', 'del', 'details', 'bdo', 'blockquote', 'body', 'button', 'cite', 'code', 'dd', 'del', 'details',
'dfn', 'div', 'dl', 'dt', 'em', 'fieldset', 'figcaption', 'figure', 'footer', 'dfn', 'div', 'dl', 'dt', 'em', 'fieldset', 'figcaption', 'figure', 'footer',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'i', 'ins', 'kbd', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'i', 'iframe', 'ins', 'kbd',
'label', 'legend', 'li', 'map', 'mark', 'meter', 'nav', 'ol', 'output', 'p', 'label', 'legend', 'li', 'map', 'mark', 'meter', 'nav', 'ol', 'output', 'p',
'pre', 'progress', 'q', 'rp', 'rt', 'samp', 'section', 'select', 'small', 'pre', 'progress', 'q', 'rp', 'rt', 'samp', 'section', 'select', 'small',
'span', 'strong', 'sub', 'summary', 'sup', 'textarea', 'time', 'ul', 'var', 'span', 'strong', 'sub', 'summary', 'sup', 'textarea', 'time', 'ul', 'var',
@ -400,8 +400,8 @@ def xml2str(root, pretty_print=False, strip_comments=False, with_tail=True):
return ans return ans
def xml2text(elem, pretty_print=False): def xml2text(elem, pretty_print=False, method='text'):
return etree.tostring(elem, method='text', encoding='unicode', with_tail=False, pretty_print=pretty_print) return etree.tostring(elem, method=method, encoding='unicode', with_tail=False, pretty_print=pretty_print)
def escape_cdata(root): def escape_cdata(root):

View File

@ -18,7 +18,7 @@ OUTPUT = '/t/dev.pdf'
class Renderer(QWebEnginePage): class Renderer(QWebEnginePage):
def do_print(self, ok): def do_print(self, ok):
p = QPageLayout(QPageSize(QPageSize(QPageSize.A6)), QPageLayout.Portrait, QMarginsF(10, 10, 10, 10)) p = QPageLayout(QPageSize(QPageSize(QPageSize.A4)), QPageLayout.Portrait, QMarginsF(72, 0, 72, 0))
self.printToPdf(self.print_finished, p) self.printToPdf(self.print_finished, p)
def print_finished(self, pdf_data): def print_finished(self, pdf_data):
@ -28,8 +28,6 @@ class Renderer(QWebEnginePage):
podofo = get_podofo() podofo = get_podofo()
doc = podofo.PDFDoc() doc = podofo.PDFDoc()
doc.load(pdf_data) doc.load(pdf_data)
from pprint import pprint
pprint(doc.extract_anchors())
def main(): def main():

View File

@ -13,6 +13,7 @@ import signal
import sys import sys
from collections import namedtuple from collections import namedtuple
from io import BytesIO from io import BytesIO
from itertools import repeat
from operator import attrgetter, itemgetter from operator import attrgetter, itemgetter
from PyQt5.Qt import ( from PyQt5.Qt import (
@ -20,10 +21,10 @@ from PyQt5.Qt import (
) )
from PyQt5.QtWebEngineWidgets import QWebEnginePage from PyQt5.QtWebEngineWidgets import QWebEnginePage
from calibre import detect_ncpus from calibre import detect_ncpus, prepare_string_for_xml
from calibre.constants import iswindows from calibre.constants import iswindows
from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet
from calibre.ebooks.oeb.base import XHTML from calibre.ebooks.oeb.base import XHTML, xml2text
from calibre.ebooks.oeb.polish.container import Container as ContainerBase from calibre.ebooks.oeb.polish.container import Container as ContainerBase
from calibre.ebooks.oeb.polish.toc import get_toc from calibre.ebooks.oeb.polish.toc import get_toc
from calibre.ebooks.pdf.image_writer import ( from calibre.ebooks.pdf.image_writer import (
@ -55,7 +56,7 @@ def data_as_pdf_doc(data):
def create_skeleton(container): def create_skeleton(container):
spine_name = next(container.spine_names)[0] spine_name = tuple(container.spine_names)[-1][0]
root = container.parsed(spine_name) root = container.parsed(spine_name)
root = copy.deepcopy(root) root = copy.deepcopy(root)
body = root[-1] body = root[-1]
@ -213,20 +214,23 @@ class RenderManager(QObject):
QApplication.instance().exit(OK) QApplication.instance().exit(OK)
def resolve_margins(margins, page_layout):
old_margins = page_layout.marginsPoints()
def m(which):
ans = getattr(margins, which, None)
if ans is None:
ans = getattr(old_margins, which)()
return ans
return Margins(*map(m, 'left top right bottom'.split()))
def job_for_name(container, name, margins, page_layout): def job_for_name(container, name, margins, page_layout):
index_file = container.name_to_abspath(name) index_file = container.name_to_abspath(name)
if margins: if margins:
def m(which):
ans = getattr(margins, which)
if ans is None:
ans = getattr(old_margins, which)()
return ans
page_layout = QPageLayout(page_layout) page_layout = QPageLayout(page_layout)
page_layout.setUnits(QPageLayout.Point) page_layout.setUnits(QPageLayout.Point)
old_margins = page_layout.marginsPoints() new_margins = QMarginsF(*resolve_margins(margins, page_layout))
new_margins = QMarginsF(*map(m, 'left top right bottom'.split()))
page_layout.setMargins(new_margins) page_layout.setMargins(new_margins)
return index_file, page_layout, name return index_file, page_layout, name
# }}} # }}}
@ -324,14 +328,14 @@ def make_anchors_unique(container):
else: else:
name = container.href_to_name(href, base) name = container.href_to_name(href, base)
if not name: if not name:
return url return url.rstrip('#')
if not frag and name in spine_names: if not frag and name in spine_names:
replacer.replaced = True replacer.replaced = True
return 'https://calibre-pdf-anchor.n#' + name return 'https://calibre-pdf-anchor.n#' + name
key = name, frag key = name, frag
new_frag = mapping.get(key) new_frag = mapping.get(key)
if new_frag is None: if new_frag is None:
return url return url.rstrip('#')
replacer.replaced = True replacer.replaced = True
return 'https://calibre-pdf-anchor.a#' + new_frag return 'https://calibre-pdf-anchor.a#' + new_frag
if url.startswith('#'): if url.startswith('#'):
@ -782,6 +786,98 @@ def test_merge_fonts():
# }}} # }}}
# Header/footer {{{
PAGE_NUMBER_TEMPLATE = '<footer><div style="margin: auto">_PAGENUM_</div></footer>'
def add_header_footer(manager, opts, pdf_doc, container, page_number_display_map, page_layout, page_margins_map, pdf_metadata, report_progress):
header_template, footer_template = opts.pdf_header_template, opts.pdf_footer_template
if not footer_template and opts.pdf_page_numbers:
footer_template = PAGE_NUMBER_TEMPLATE
if not header_template and not footer_template:
return
report_progress(0.8, _('Adding headers and footers'))
name = create_skeleton(container)
root = container.parsed(name)
body = root[-1]
body.set('style', 'margin: 0; padding: 0; border-width: 0')
skeleton = xml2text(root, method='html')
job = job_for_name(container, name, Margins(0, 0, 0, 0), page_layout)
def m(tag_name, text=None, **attrs):
ans = root.makeelement(XHTML(tag_name), **attrs)
if text is not None:
ans.text = text
return ans
justify = 'flex-end'
if header_template:
justify = 'space-between' if footer_template else 'flex-start'
del root[0][:]
root[0].append(m('style', '''
* {{ margin: 0; padding: 0; border-width: 0; box-sizing: border-box; }}
div {{
page-break-inside: avoid;
page-break-after:always;
display: flex;
flex-direction: column;
height: 100%;
margin-bottom: 0pt;
justify-content: {justify}
}}
'''.format(justify=justify)))
def create_iframe(margins, f, is_footer=False):
style = {
'margin-left': '{}pt'.format(margins.left),
'margin-right': '{}pt'.format(margins.right),
'height': '{}pt'.format(margins.bottom if is_footer else margins.top)}
style = '; '.join('{}: {}'.format(k, v) for k, v in iteritems(style))
return m(
'iframe', seamless='seamless', style=style,
srcdoc=f
)
def format_template(template, page_num):
# TODO: _SECTION_ and _TOP_LEVEL_SECTION_
template = template.replace('_PAGENUM_', unicode_type(page_number_display_map[page_num]))
extra_style = 'header, footer { margin: 0; padding: 0; border-width: 0; height: 100vh; display: flex; align-items: center }'
if page_num % 2:
extra_style += '.even_page { display: none }'
else:
extra_style += '.odd_page { display: none }'
template = template.replace('_TITLE_', prepare_string_for_xml(pdf_metadata.title, True))
template = template.replace('_AUTHOR_', prepare_string_for_xml(pdf_metadata.author, True))
template += '<style>{}</style>'.format(extra_style)
repl = skeleton.replace('</body>', template + '</body>', 1)
if repl == skeleton:
raise ValueError('Failed to insert template into skeleton: ' + skeleton)
return repl
for page_num in range(1, pdf_doc.page_count() + 1):
div = m('div')
body.append(div)
margins = page_margins_map[page_num - 1]
if header_template:
f = format_template(header_template, page_num)
div.append(create_iframe(margins, f))
if footer_template:
f = format_template(footer_template, page_num)
div.append(create_iframe(margins, f, True))
container.commit()
results = manager.convert_html_files([job], settle_time=2)
data = results[name]
if not isinstance(data, bytes):
raise SystemExit(data)
doc = data_as_pdf_doc(data)
pdf_doc.append(doc)
report_progress(0.9, _('Headers and footers added'))
# }}}
def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None, report_progress=lambda x, y: None): def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, cover_data=None, report_progress=lambda x, y: None):
container = Container(opf_path, log) container = Container(opf_path, log)
report_progress(0.05, _('Parsed all content for markup transformation')) report_progress(0.05, _('Parsed all content for markup transformation'))
@ -802,6 +898,7 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
jobs.append(job_for_name(container, margin_file.name, margin_file.margins, page_layout)) jobs.append(job_for_name(container, margin_file.name, margin_file.margins, page_layout))
results = manager.convert_html_files(jobs, settle_time=1) results = manager.convert_html_files(jobs, settle_time=1)
num_pages = 0 num_pages = 0
page_margins_map = []
for margin_file in margin_files: for margin_file in margin_files:
name = margin_file.name name = margin_file.name
data = results[name] data = results[name]
@ -809,7 +906,9 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
raise SystemExit(data) raise SystemExit(data)
doc = data_as_pdf_doc(data) doc = data_as_pdf_doc(data)
anchor_locations.update(get_anchor_locations(doc, num_pages + 1, links_page_uuid)) anchor_locations.update(get_anchor_locations(doc, num_pages + 1, links_page_uuid))
num_pages += doc.page_count() doc_pages = doc.page_count()
page_margins_map.extend(repeat(resolve_margins(margin_file.margins, page_layout), doc_pages))
num_pages += doc_pages
if pdf_doc is None: if pdf_doc is None:
pdf_doc = doc pdf_doc = doc
@ -837,11 +936,15 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
add_toc(PDFOutlineRoot(pdf_doc), toc) add_toc(PDFOutlineRoot(pdf_doc), toc)
report_progress(0.75, _('Added links to PDF content')) report_progress(0.75, _('Added links to PDF content'))
pdf_metadata = PDFMetadata(metadata)
add_header_footer(manager, opts, pdf_doc, container, page_number_display_map, page_layout, page_margins_map, pdf_metadata, report_progress)
merge_fonts(pdf_doc) merge_fonts(pdf_doc)
num_removed = dedup_type3_fonts(pdf_doc) num_removed = dedup_type3_fonts(pdf_doc)
if num_removed: if num_removed:
log('Removed', num_removed, 'duplicated Type3 glyphs') log('Removed', num_removed, 'duplicated Type3 glyphs')
# TODO: dedup images
# TODO: Support for mathematics # TODO: Support for mathematics
num_removed = remove_unused_fonts(pdf_doc) num_removed = remove_unused_fonts(pdf_doc)
@ -852,7 +955,7 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
add_cover(pdf_doc, cover_data, page_layout, opts) add_cover(pdf_doc, cover_data, page_layout, opts)
if metadata is not None: if metadata is not None:
update_metadata(pdf_doc, PDFMetadata(metadata)) update_metadata(pdf_doc, pdf_metadata)
report_progress(1, _('Updated metadata in PDF')) report_progress(1, _('Updated metadata in PDF'))
if opts.uncompressed_pdf: if opts.uncompressed_pdf: