PDF Output: Generate a PDF Outline based on the Table fo Contents of the input document

This commit is contained in:
Kovid Goyal 2012-08-29 13:39:44 +05:30
parent 8c04dc14bd
commit 1dea118004
4 changed files with 138 additions and 16 deletions

View File

@ -107,7 +107,7 @@ class PDFOutput(OutputFormatPlugin):
def convert_images(self, images):
from calibre.ebooks.pdf.writer import ImagePDFWriter
self.write(ImagePDFWriter, images)
self.write(ImagePDFWriter, images, None)
def get_cover_data(self):
oeb = self.oeb
@ -132,11 +132,13 @@ class PDFOutput(OutputFormatPlugin):
opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0]
opf = OPF(opfpath, os.path.dirname(opfpath))
self.write(PDFWriter, [s.path for s in opf.spine])
self.write(PDFWriter, [s.path for s in opf.spine], getattr(opf,
'toc', None))
def write(self, Writer, items):
def write(self, Writer, items, toc):
from calibre.ebooks.pdf.writer import PDFMetadata
writer = Writer(self.opts, self.log, cover_data=self.cover_data)
writer = Writer(self.opts, self.log, cover_data=self.cover_data,
toc=toc)
close = False
if not hasattr(self.output_path, 'write'):

View File

@ -0,0 +1,66 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from collections import defaultdict
class Outline(object):
def __init__(self, toc, items):
self.toc = toc
self.items = items
self.anchor_map = {}
self.pos_map = defaultdict(dict)
self.toc_map = {}
for item in items:
self.anchor_map[item] = anchors = set()
item_path = os.path.abspath(item).replace('/', os.sep)
if self.toc is not None:
for x in self.toc.flat():
if x.abspath != item_path: continue
x.outline_item_ = item
if x.fragment:
anchors.add(x.fragment)
def set_pos(self, item, anchor, pagenum, ypos):
self.pos_map[item][anchor] = (pagenum, ypos)
def get_pos(self, toc):
page, ypos = 0, 0
item = getattr(toc, 'outline_item_', None)
if item is not None:
if toc.fragment:
amap = self.pos_map.get(item, None)
if amap is not None:
page, ypos = amap.get(toc.fragment, (0, 0))
else:
page, ypos = self.pos_map.get(item, {}).get(None, (0, 0))
return page, ypos
def add_children(self, toc, parent):
for child in toc:
page, ypos = self.get_pos(child)
text = child.text or _('Page %d')%page
cn = parent.create(text, page, True)
self.add_children(child, cn)
def __call__(self, doc):
self.pos_map = dict(self.pos_map)
first = None
for child in self.toc:
page, ypos = self.get_pos(child)
text = child.text or _('Page %d')%page
if first is None:
first = node = doc.create_outline(text, page)
else:
node = first.create(text, page, False)
self.add_children(child, node)

View File

@ -8,16 +8,16 @@ __docformat__ = 'restructuredtext en'
Write content to PDF.
'''
import os
import shutil
import os, shutil, json
from future_builtins import map
from PyQt4.Qt import (QEventLoop, QObject, QPrinter, QSizeF, Qt, QPainter,
QPixmap, QTimer)
from PyQt4.QtWebKit import QWebView
QPixmap, QTimer, pyqtProperty, QString)
from PyQt4.QtWebKit import QWebView, QWebPage
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ebooks.pdf.pageoptions import (unit, paper_size, orientation)
from calibre.ebooks.pdf.outline_writer import Outline
from calibre.ebooks.metadata import authors_to_string
from calibre.ptempfile import PersistentTemporaryFile
from calibre import __appname__, __version__, fit_image, isosx, force_unicode
@ -36,7 +36,7 @@ def get_custom_size(opts):
custom_size = None
return custom_size
def get_pdf_printer(opts, for_comic=False, output_file_name=None):
def get_pdf_printer(opts, for_comic=False, output_file_name=None): # {{{
from calibre.gui2 import is_ok_to_use_qt
if not is_ok_to_use_qt():
raise Exception('Not OK to use Qt')
@ -82,6 +82,7 @@ def get_pdf_printer(opts, for_comic=False, output_file_name=None):
printer.setOutputFormat(QPrinter.NativeFormat)
return printer
# }}}
def draw_image_page(printer, painter, p, preserve_aspect_ratio=True):
page_rect = printer.pageRect()
@ -102,7 +103,7 @@ def draw_image_page(printer, painter, p, preserve_aspect_ratio=True):
painter.drawPixmap(page_rect, p, p.rect())
class PDFMetadata(object):
class PDFMetadata(object): # {{{
def __init__(self, oeb_metadata=None):
self.title = _(u'Unknown')
self.author = _(u'Unknown')
@ -118,10 +119,24 @@ class PDFMetadata(object):
self.title = force_unicode(self.title)
self.author = force_unicode(self.author)
# }}}
class Page(QWebPage):
def __init__(self, log):
self.log = log
QWebPage.__init__(self)
def javaScriptConsoleMessage(self, msg, lineno, msgid):
self.log.debug(u'JS:', unicode(msg))
def javaScriptAlert(self, frame, msg):
self.log(unicode(msg))
class PDFWriter(QObject): # {{{
def __init__(self, opts, log, cover_data=None):
def __init__(self, opts, log, cover_data=None, toc=None):
from calibre.gui2 import is_ok_to_use_qt
from calibre.utils.podofo import get_podofo
if not is_ok_to_use_qt():
@ -134,6 +149,8 @@ class PDFWriter(QObject): # {{{
self.loop = QEventLoop()
self.view = QWebView()
self.page = Page(self.log)
self.view.setPage(self.page)
self.view.setRenderHints(QPainter.Antialiasing|QPainter.TextAntialiasing|QPainter.SmoothPixmapTransform)
self.view.loadFinished.connect(self._render_html,
type=Qt.QueuedConnection)
@ -147,10 +164,12 @@ class PDFWriter(QObject): # {{{
self.opts = opts
self.cover_data = cover_data
self.paged_js = None
self.toc = toc
def dump(self, items, out_stream, pdf_metadata):
self.metadata = pdf_metadata
self._delete_tmpdir()
self.outline = Outline(self.toc, items)
self.render_queue = items
self.combine_queue = []
@ -178,6 +197,7 @@ class PDFWriter(QObject): # {{{
self.combine_queue.append(os.path.join(self.tmp_path, '%i.pdf' % (len(self.combine_queue) + 1)))
self.logger.debug('Processing %s...' % item)
self.current_item = item
load_html(item, self.view)
def _render_html(self, ok):
@ -192,11 +212,22 @@ class PDFWriter(QObject): # {{{
return
self._render_book()
def _pass_json_value_getter(self):
val = json.dumps(self.bridge_value)
return QString(val)
def _pass_json_value_setter(self, value):
self.bridge_value = json.loads(unicode(value))
_pass_json_value = pyqtProperty(QString, fget=_pass_json_value_getter,
fset=_pass_json_value_setter)
def do_paged_render(self, outpath):
from PyQt4.Qt import QSize, QPainter
if self.paged_js is None:
from calibre.utils.resources import compiled_coffeescript
self.paged_js = compiled_coffeescript('ebooks.oeb.display.utils')
self.paged_js += compiled_coffeescript('ebooks.oeb.display.indexing')
self.paged_js += compiled_coffeescript('ebooks.oeb.display.paged')
printer = get_pdf_printer(self.opts, output_file_name=outpath)
painter = QPainter(printer)
@ -204,12 +235,20 @@ class PDFWriter(QObject): # {{{
zoomy = printer.logicalDpiY()/self.view.logicalDpiY()
painter.scale(zoomx, zoomy)
self.view.page().mainFrame().addToJavaScriptWindowObject("py_bridge", self)
pr = printer.pageRect()
evaljs = self.view.page().mainFrame().evaluateJavaScript
evaljs(self.paged_js)
self.view.page().setViewportSize(QSize(pr.width()/zoomx,
pr.height()/zoomy))
evaljs('''
py_bridge.__defineGetter__('value', function() {
return JSON.parse(this._pass_json_value);
});
py_bridge.__defineSetter__('value', function(val) {
this._pass_json_value = JSON.stringify(val);
});
document.body.style.backgroundColor = "white";
paged_display.set_geometry(1, 0, 0, 0);
paged_display.layout();
@ -223,6 +262,17 @@ class PDFWriter(QObject): # {{{
evaljs('window.scrollTo(%d, 0)'%nsl[0])
printer.newPage()
self.bridge_value = tuple(self.outline.anchor_map[self.current_item])
evaljs('py_bridge.value = book_indexing.anchor_positions(py_bridge.value)')
amap = self.bridge_value
if not isinstance(amap, dict):
amap = {} # Some javascript error occurred
pages = self.doc.page_count()
self.outline.set_pos(self.current_item, None, pages, 0)
for anchor, x in amap.iteritems():
pagenum, ypos = x
self.outline.set_pos(self.current_item, anchor, pages + pagenum, ypos)
painter.end()
printer.abort()
self.append_doc(outpath)
@ -266,6 +316,7 @@ class PDFWriter(QObject): # {{{
self.doc.author = self.metadata.author
if self.metadata.tags:
self.doc.keywords = self.metadata.tags
self.outline(self.doc)
raw = self.doc.write()
self.out_stream.write(raw)
self.render_succeeded = True
@ -275,9 +326,9 @@ class PDFWriter(QObject): # {{{
# }}}
class ImagePDFWriter(object):
class ImagePDFWriter(object): # {{{
def __init__(self, opts, log, cover_data=None):
def __init__(self, opts, log, cover_data=None, toc=None):
self.opts = opts
self.log = log
@ -326,6 +377,6 @@ class ImagePDFWriter(object):
self.log.warn('Failed to load image', i)
painter.end()
# }}}

View File

@ -200,8 +200,9 @@ PDFDoc_create_outline(PDFDoc *self, PyObject *args) {
PyObject *p;
PDFOutlineItem *ans;
PdfString *title;
int pagenum;
if (!PyArg_ParseTuple(args, "U", &p)) return NULL;
if (!PyArg_ParseTuple(args, "Ui", &p, &pagenum)) return NULL;
title = podofo_convert_pystring(p);
if (title == NULL) return NULL;
@ -214,6 +215,8 @@ PDFDoc_create_outline(PDFDoc *self, PyObject *args) {
ans->item = outlines->CreateRoot(*title);
if (ans->item == NULL) {PyErr_NoMemory(); goto error;}
ans->doc = self->doc;
PdfDestination dest(self->doc->GetPage(pagenum));
ans->item->SetDestination(dest);
} catch(const PdfError & err) {
podofo_set_exception(err); goto error;
} catch (...) {
@ -466,7 +469,7 @@ static PyMethodDef PDFDoc_methods[] = {
"set_box(page_num, box, left, bottom, width, height) -> Set the PDF bounding box for the page numbered nu, box must be one of: MediaBox, CropBox, TrimBox, BleedBox, ArtBox. The numbers are interpreted as pts."
},
{"create_outline", (PyCFunction)PDFDoc_create_outline, METH_VARARGS,
"create_outline(title) -> Create an outline, return the root outline item."
"create_outline(title, pagenum) -> Create an outline, return the first outline item."
},
{NULL} /* Sentinel */