Add support for page number mapping

This commit is contained in:
Kovid Goyal 2019-07-14 09:55:51 +05:30
parent 43d7e2119a
commit e575be49e3
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 45 additions and 90 deletions

View File

@ -33,7 +33,7 @@ from calibre.gui2.webengine import secure_webengine
from calibre.utils.logging import default_log from calibre.utils.logging import default_log
from calibre.utils.podofo import get_podofo, set_metadata_implementation from calibre.utils.podofo import get_podofo, set_metadata_implementation
from calibre.utils.short_uuid import uuid4 from calibre.utils.short_uuid import uuid4
from polyglot.builtins import iteritems, range from polyglot.builtins import iteritems, range, unicode_type
from polyglot.urllib import urlparse from polyglot.urllib import urlparse
OK, KILL_SIGNAL = range(0, 2) OK, KILL_SIGNAL = range(0, 2)
@ -175,6 +175,19 @@ class RenderManager(QObject):
raise SystemExit('Unknown error occurred') raise SystemExit('Unknown error occurred')
return self.results return self.results
def evaljs(self, js):
if not self.workers:
self.create_worker()
w = self.workers[0]
self.evaljs_result = None
w.runJavaScript(js, self.evaljs_callback)
QApplication.exec_()
return self.evaljs_result
def evaljs_callback(self, result):
self.evaljs_result = result
QApplication.instance().exit(0)
def assign_work(self): def assign_work(self):
free_workers = [w for w in self.workers if not w.working] free_workers = [w for w in self.workers if not w.working]
while free_workers and self.pending: while free_workers and self.pending:
@ -423,7 +436,30 @@ def add_toc(pdf_parent, toc_parent):
add_toc(pdf_child, child) add_toc(pdf_child, child)
def add_pagenum_toc(root, toc, opts): def get_page_number_display_map(render_manager, opts, num_pages, log):
num_pages *= 2
default_map = {n:n for n in range(1, num_pages + 1)}
if opts.pdf_page_number_map:
js = '''
function map_num(n) { return eval(MAP_EXPRESSION); }
var ans = {};
for (var i=1; i <= NUM_PAGES; i++) ans[i] = map_num(i);
JSON.stringify(ans);
'''.replace('MAP_EXPRESSION', json.dumps(opts.pdf_page_number_map), 1).replace(
'NUM_PAGES', unicode_type(num_pages), 1)
result = render_manager.evaljs(js)
try:
result = json.loads(result)
if not isinstance(result, dict):
raise ValueError('Not a dict')
except Exception:
log.warn('Could not do page number mapping, got unexpected result: {}'.format(repr(result)))
else:
default_map = {int(k): int(v) for k, v in iteritems(result)}
return default_map
def add_pagenum_toc(root, toc, opts, page_number_display_map):
body = root[-1] body = root[-1]
indents = [] indents = []
for i in range(1, 7): for i in range(1, 7):
@ -464,7 +500,9 @@ def add_pagenum_toc(root, toc, opts):
for level, node in toc.iterdescendants(level=0): for level, node in toc.iterdescendants(level=0):
tr = E('tr', cls='level-%d' % level, parent=table) tr = E('tr', cls='level-%d' % level, parent=table)
E('td', text=node.title or _('Unknown'), parent=tr) E('td', text=node.title or _('Unknown'), parent=tr)
E('td', text='{}'.format(node.pdf_loc.pagenum), parent=tr) num = node.pdf_loc.pagenum
num = page_number_display_map.get(num, num)
E('td', text='{}'.format(num), parent=tr)
# }}} # }}}
@ -504,12 +542,14 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
else: else:
pdf_doc.append(doc) pdf_doc.append(doc)
page_number_display_map = get_page_number_display_map(manager, opts, num_pages, log)
if has_toc: if has_toc:
annotate_toc(toc, anchor_locations, name_anchor_map, log) annotate_toc(toc, anchor_locations, name_anchor_map, log)
if opts.pdf_add_toc: if opts.pdf_add_toc:
tocname = create_skeleton(container) tocname = create_skeleton(container)
root = container.parsed(tocname) root = container.parsed(tocname)
add_pagenum_toc(root, toc, opts) add_pagenum_toc(root, toc, opts, page_number_display_map)
container.commit() container.commit()
jobs = [job_for_name(container, tocname, None, page_layout)] jobs = [job_for_name(container, tocname, None, page_layout)]
results = manager.convert_html_files(jobs, settle_time=1) results = manager.convert_html_files(jobs, settle_time=1)
@ -526,6 +566,7 @@ def convert(opf_path, opts, metadata=None, output_path=None, log=default_log, co
# TODO: Remove unused fonts # TODO: Remove unused fonts
# TODO: Remove duplicate fonts # TODO: Remove duplicate fonts
# TODO: Subset and embed fonts before rendering PDF # TODO: Subset and embed fonts before rendering PDF
# TODO: Support for mathematics
if cover_data: if cover_data:
add_cover(pdf_doc, cover_data, page_layout, opts) add_cover(pdf_doc, cover_data, page_layout, opts)

View File

@ -1,86 +0,0 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os
from lxml.html import tostring
from lxml.html.builder import (HTML, HEAD, BODY, TABLE, TR, TD, H2, STYLE)
from polyglot.builtins import range, unicode_type
def calculate_page_number(num, map_expression, evaljs):
if map_expression:
num = int(evaljs('(function(){{var n={}; return {};}})()'.format(
num, map_expression)))
return num
def convert_node(toc, table, level, pdf, pdf_page_number_map, evaljs):
tr = TR(
TD(toc.text or _('Unknown')), TD(),
)
tr.set('class', 'level-%d' % level)
anchors = pdf.links.anchors
path = toc.abspath or None
frag = toc.fragment or None
if path is None:
return
path = os.path.normcase(os.path.abspath(path))
if path not in anchors:
return None
a = anchors[path]
dest = a.get(frag, a[None])
num = calculate_page_number(pdf.page_tree.obj.get_num(dest[0]), pdf_page_number_map, evaljs)
tr[1].text = unicode_type(num)
table.append(tr)
def process_children(toc, table, level, pdf, pdf_page_number_map, evaljs):
for child in toc:
convert_node(child, table, level, pdf, pdf_page_number_map, evaljs)
process_children(child, table, level+1, pdf, pdf_page_number_map, evaljs)
def toc_as_html(toc, pdf, opts, evaljs):
pdf = pdf.engine.pdf
indents = []
for i in range(1, 7):
indents.extend((i, 1.4*i))
html = HTML(
HEAD(
STYLE(
'''
.calibre-pdf-toc table { width: 100%% }
.calibre-pdf-toc table tr td:last-of-type { text-align: right }
.calibre-pdf-toc .level-0 {
font-size: larger;
}
.calibre-pdf-toc .level-%d td:first-of-type { padding-left: %.1gem }
.calibre-pdf-toc .level-%d td:first-of-type { padding-left: %.1gem }
.calibre-pdf-toc .level-%d td:first-of-type { padding-left: %.1gem }
.calibre-pdf-toc .level-%d td:first-of-type { padding-left: %.1gem }
.calibre-pdf-toc .level-%d td:first-of-type { padding-left: %.1gem }
.calibre-pdf-toc .level-%d td:first-of-type { padding-left: %.1gem }
''' % tuple(indents) + (opts.extra_css or '')
)
),
BODY(
H2(opts.toc_title or _('Table of Contents')),
TABLE(),
)
)
body = html[1]
body.set('class', 'calibre-pdf-toc')
process_children(toc, body[1], 0, pdf, opts.pdf_page_number_map, evaljs)
return tostring(html, pretty_print=True, include_meta_content_type=True, encoding='utf-8')