Add support for outlines and links to new pdf engine and make it the default engine

This commit is contained in:
Kovid Goyal 2012-12-29 00:29:21 +05:30
parent e1cce49ca9
commit e27704e4c4
6 changed files with 163 additions and 33 deletions

View File

@ -91,12 +91,14 @@ class PDFOutput(OutputFormatPlugin):
OptionRecommendation(name='pdf_mono_font_size',
recommended_value=16, help=_(
'The default font size for monospaced text')),
# OptionRecommendation(name='old_pdf_engine', recommended_value=False,
# help=_('Use the old, less capable engine to generate the PDF')),
# OptionRecommendation(name='uncompressed_pdf',
# recommended_value=False, help=_(
# 'Generate an uncompressed PDF, useful for debugging, '
# 'only works with the new PDF engine.')),
OptionRecommendation(name='pdf_mark_links', recommended_value=False,
help=_('Surround all links with a red box, useful for debugging.')),
OptionRecommendation(name='old_pdf_engine', recommended_value=False,
help=_('Use the old, less capable engine to generate the PDF')),
OptionRecommendation(name='uncompressed_pdf',
recommended_value=False, help=_(
'Generate an uncompressed PDF, useful for debugging, '
'only works with the new PDF engine.')),
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
@ -190,13 +192,12 @@ class PDFOutput(OutputFormatPlugin):
val[i].value = family_map[k]
def convert_text(self, oeb_book):
from calibre.utils.config import tweaks
if tweaks.get('new_pdf_engine', False):
from calibre.ebooks.pdf.render.from_html import PDFWriter
from calibre.ebooks.metadata.opf2 import OPF
if self.opts.old_pdf_engine:
from calibre.ebooks.pdf.writer import PDFWriter
PDFWriter
else:
from calibre.ebooks.pdf.writer import PDFWriter
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.pdf.render.from_html import PDFWriter
self.log.debug('Serializing oeb input to disk for processing...')
self.get_cover_data()

View File

@ -97,7 +97,8 @@ class Dictionary(dict):
def pdf_serialize(self, stream):
stream.write(b'<<' + EOL)
sorted_keys = sorted(self.iterkeys(),
key=lambda x:((' ' if x == 'Type' else '')+x))
key=lambda x:({'Type':'1', 'Subtype':'2'}.get(
x, x)+x))
for k in sorted_keys:
serialize(Name(k), stream)
stream.write(b' ')
@ -169,5 +170,11 @@ class Reference(object):
def pdf_serialize(self, stream):
raw = '%d 0 R'%self.num
stream.write(raw.encode('ascii'))
def __repr__(self):
return '%d 0 R'%self.num
def __str__(self):
return repr(self)
# }}}

View File

@ -188,10 +188,11 @@ class PdfEngine(QPaintEngine):
def __init__(self, file_object, page_width, page_height, left_margin,
top_margin, right_margin, bottom_margin, width, height,
errors=print, debug=print, compress=True):
errors=print, debug=print, compress=True,
mark_links=False):
QPaintEngine.__init__(self, self.features)
self.file_object = file_object
self.compress = compress
self.compress, self.mark_links = compress, mark_links
self.page_height, self.page_width = page_height, page_width
self.left_margin, self.top_margin = left_margin, top_margin
self.right_margin, self.bottom_margin = right_margin, bottom_margin
@ -249,10 +250,10 @@ class PdfEngine(QPaintEngine):
if not hasattr(self, 'pdf'):
try:
self.pdf = PDFStream(self.file_object, (self.page_width,
self.page_height),
compress=self.compress)
self.page_height), compress=self.compress,
mark_links=self.mark_links)
except:
self.errors.append(traceback.format_exc())
self.errors(traceback.format_exc())
return False
return True
@ -268,7 +269,7 @@ class PdfEngine(QPaintEngine):
self.end_page()
self.pdf.end()
except:
self.errors.append(traceback.format_exc())
self.errors(traceback.format_exc())
return False
finally:
self.pdf = self.file_object = None
@ -484,6 +485,24 @@ class PdfEngine(QPaintEngine):
def set_metadata(self, *args, **kwargs):
self.pdf.set_metadata(*args, **kwargs)
def add_outline(self, toc):
self.pdf.links.add_outline(toc)
def add_links(self, current_item, start_page, links, anchors):
for pos in anchors.itervalues():
pos['left'], pos['top'] = self.pdf_system.map(pos['left'], pos['top'])
for link in links:
pos = link[1]
llx = pos['left']
lly = pos['top'] + pos['height']
urx = pos['left'] + pos['width']
ury = pos['top']
llx, lly = self.pdf_system.map(llx, lly)
urx, ury = self.pdf_system.map(urx, ury)
link[1] = pos['column'] + start_page
link.append((llx, lly, urx, ury))
self.pdf.links.add(current_item, start_page, links, anchors)
def __enter__(self):
self.pdf.save_stack()
self.saved_ps = (self.do_stroke, self.do_fill)
@ -497,7 +516,8 @@ class PdfDevice(QPaintDevice): # {{{
def __init__(self, file_object, page_size=A4, left_margin=inch,
top_margin=inch, right_margin=inch, bottom_margin=inch,
xdpi=1200, ydpi=1200, errors=print, debug=print, compress=True):
xdpi=1200, ydpi=1200, errors=print, debug=print,
compress=True, mark_links=False):
QPaintDevice.__init__(self)
self.xdpi, self.ydpi = xdpi, ydpi
self.page_width, self.page_height = page_size
@ -506,7 +526,10 @@ class PdfDevice(QPaintDevice): # {{{
self.engine = PdfEngine(file_object, self.page_width, self.page_height,
left_margin, top_margin, right_margin,
bottom_margin, self.width(), self.height(),
errors=errors, debug=debug, compress=compress)
errors=errors, debug=debug, compress=compress,
mark_links=mark_links)
self.add_outline = self.engine.add_outline
self.add_links = self.engine.add_links
def paintEngine(self):
return self.engine

View File

@ -20,7 +20,6 @@ from calibre.ebooks.oeb.display.webview import load_html
from calibre.ebooks.pdf.render.common import (inch, cm, mm, pica, cicero,
didot, PAPER_SIZES)
from calibre.ebooks.pdf.render.engine import PdfDevice
from calibre.ebooks.pdf.render.links import Links
def get_page_size(opts, for_comic=False): # {{{
use_profile = not (opts.override_profile_size or
@ -143,7 +142,6 @@ class PDFWriter(QObject):
self.view.page().mainFrame().setScrollBarPolicy(x,
Qt.ScrollBarAlwaysOff)
self.report_progress = lambda x, y: x
self.links = Links()
def dump(self, items, out_stream, pdf_metadata):
opts = self.opts
@ -156,7 +154,8 @@ class PDFWriter(QObject):
top_margin=0, right_margin=mr, bottom_margin=0,
xdpi=xdpi, ydpi=ydpi, errors=self.log.error,
debug=self.log.debug, compress=not
opts.uncompressed_pdf)
opts.uncompressed_pdf,
mark_links=opts.pdf_mark_links)
self.page.setViewportSize(QSize(self.doc.width(), self.doc.height()))
self.render_queue = items
@ -187,7 +186,9 @@ class PDFWriter(QObject):
QTimer.singleShot(0, self.render_book)
self.loop.exec_()
# TODO: Outline and links
if self.toc is not None and len(self.toc) > 0:
self.doc.add_outline(self.toc)
self.painter.end()
if self.doc.errors_occurred:
@ -261,8 +262,7 @@ class PDFWriter(QObject):
amap = self.bridge_value
if not isinstance(amap, dict):
amap = {'links':[], 'anchors':{}} # Some javascript error occurred
self.links.add(self.current_item, self.current_page_num, amap['links'],
amap['anchors'])
start_page = self.current_page_num
mf = self.view.page().mainFrame()
while True:
@ -278,3 +278,6 @@ class PDFWriter(QObject):
if self.doc.errors_occurred:
break
self.doc.add_links(self.current_item, start_page, amap['links'],
amap['anchors'])

View File

@ -8,25 +8,112 @@ __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from future_builtins import map
from urlparse import urlparse, urlunparse
from urllib2 import quote, unquote
from calibre.ebooks.pdf.render.common import Array, Name
from calibre.ebooks.pdf.render.common import Array, Name, Dictionary, String
class Destination(Array):
def __init__(self, start_page, pos):
def __init__(self, start_page, pos, get_pageref):
super(Destination, self).__init__(
[start_page + pos['column'], Name('FitH'), pos['y']])
[get_pageref(start_page + pos['column']), Name('XYZ'), pos['left'],
pos['top'], None]
)
class Links(object):
def __init__(self):
def __init__(self, pdf, mark_links):
self.anchors = {}
self.links = []
self.start = {'top':0, 'column':0, 'left':0}
self.pdf = pdf
self.mark_links = mark_links
def add(self, base_path, start_page, links, anchors):
path = os.path.normcase(os.path.abspath(base_path))
self.anchors[path] = a = {}
a[None] = Destination(start_page, {'y':0, 'column':0})
a[None] = Destination(start_page, self.start, self.pdf.get_pageref)
for anchor, pos in anchors.iteritems():
a[anchor] = Destination(start_page, pos)
a[anchor] = Destination(start_page, pos, self.pdf.get_pageref)
for link in links:
href, page, rect = link
p, frag = href.partition('#')[0::2]
link = ((path, p, frag or None), self.pdf.get_pageref(page).obj, Array(rect))
self.links.append(link)
def add_links(self):
for link in self.links:
path, href, frag = link[0]
page, rect = link[1:]
combined_path = os.path.abspath(os.path.join(path, *href.split('/')))
is_local = not href or combined_path in self.anchors
annot = Dictionary({
'Type':Name('Annot'), 'Subtype':Name('Link'),
'Rect':rect,
})
if self.mark_links:
annot.update({'Border':Array([16, 16, 1]), 'C':Array([1.0, 0,
0])})
if is_local:
path = combined_path if href else path
annot['Dest'] = self.anchors[path][frag]
else:
url = href + (('#'+frag) if frag else '')
purl = urlparse(url)
if purl.scheme and purl.scheme != 'file':
action = Dictionary({
'Type':Name('Action'), 'S':Name('URI'),
})
parts = (x.encode('utf-8') if isinstance(x, type(u'')) else
x for x in purl)
url = urlunparse(map(quote, map(unquote,
parts))).decode('ascii')
action['URI'] = String(url)
annot['A'] = action
if 'A' in annot or 'Dest' in annot:
if 'Annots' not in page:
page['Annots'] = Array()
page['Annots'].append(self.pdf.objects.add(annot))
def add_outline(self, toc):
parent = Dictionary({'Type':Name('Outlines')})
parentref = self.pdf.objects.add(parent)
self.process_children(toc, parentref, parent_is_root=True)
self.pdf.catalog.obj['Outlines'] = parentref
def process_children(self, toc, parentref, parent_is_root=False):
childrefs = []
for child in toc:
childref = self.process_toc_item(child, parentref)
if childref is None:
continue
if childrefs:
childrefs[-1].obj['Next'] = childref
childref.obj['Prev'] = childrefs[-1]
childrefs.append(childref)
if len(child) > 0:
self.process_children(child, childref)
if childrefs:
parentref.obj['First'] = childrefs[0]
parentref.obj['Last'] = childrefs[-1]
if not parent_is_root:
parentref.obj['Count'] = -len(childrefs)
def process_toc_item(self, toc, parentref):
path = toc.abspath or None
frag = toc.fragment or None
if path is None:
return
path = os.path.normcase(os.path.abspath(path))
if path not in self.anchors:
return None
a = self.anchors[path]
dest = a.get(frag, a[None])
item = Dictionary({'Parent':parentref, 'Dest':dest,
'Title':String(toc.text or _('Unknown'))})
return self.pdf.objects.add(item)

View File

@ -17,6 +17,7 @@ from calibre.ebooks.pdf.render.common import (
Reference, EOL, serialize, Stream, Dictionary, String, Name, Array,
GlyphIndex)
from calibre.ebooks.pdf.render.fonts import FontManager
from calibre.ebooks.pdf.render.links import Links
PDFVER = b'%PDF-1.3'
@ -219,6 +220,9 @@ class PageTree(Dictionary):
self['Kids'].append(pageref)
self['Count'] += 1
def get_ref(self, num):
return self['Kids'][num-1]
class HashingStream(object):
def __init__(self, f):
@ -277,7 +281,7 @@ class PDFStream(object):
( True, True, 'evenodd') : 'B*',
}
def __init__(self, stream, page_size, compress=False):
def __init__(self, stream, page_size, compress=False, mark_links=False):
self.stream = HashingStream(stream)
self.compress = compress
self.write_line(PDFVER)
@ -294,6 +298,7 @@ class PDFStream(object):
self.stroke_opacities, self.fill_opacities = {}, {}
self.font_manager = FontManager(self.objects, self.compress)
self.image_cache = {}
self.links = Links(self, mark_links)
@property
def page_tree(self):
@ -303,6 +308,9 @@ class PDFStream(object):
def catalog(self):
return self.objects[1]
def get_pageref(self, pagenum):
return self.page_tree.obj.get_ref(pagenum)
def set_metadata(self, title=None, author=None, tags=None):
if title:
self.info['Title'] = String(title)
@ -442,6 +450,7 @@ class PDFStream(object):
self.end_page()
self.font_manager.embed_fonts()
inforef = self.objects.add(self.info)
self.links.add_links()
self.objects.pdf_serialize(self.stream)
self.write_line()
startxref = self.objects.write_xref(self.stream)