mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
EPUB Output: Add support for Adobe page maps.
This commit is contained in:
commit
3437b583eb
@ -153,6 +153,14 @@ help on using this feature.
|
||||
'slow and if your source file contains a very large '
|
||||
'number of page breaks, you should turn off splitting '
|
||||
'on page breaks.'))
|
||||
structure('page', ['--page'], default=None,
|
||||
help=_('XPath expression to detect page boundaries for building '
|
||||
'a custom pagination map, as used by AdobeDE. Default is '
|
||||
'not to build an explicit pagination map.'))
|
||||
structure('page_names', ['--page-names'], default=None,
|
||||
help=_('XPath expression to find the name of each page in the '
|
||||
'pagination map relative to its boundary element. '
|
||||
'Default is to number all pages staring with 1.'))
|
||||
toc = c.add_group('toc',
|
||||
_('''\
|
||||
Control the automatic generation of a Table of Contents. If an OPF file is detected
|
||||
@ -230,4 +238,4 @@ to auto-generate a Table of Contents.
|
||||
c.add_opt('extract_to', ['--extract-to'], group='debug', default=None,
|
||||
help=_('Extract the contents of the produced EPUB file to the '
|
||||
'specified directory.'))
|
||||
return c
|
||||
return c
|
||||
|
@ -46,6 +46,7 @@ from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
from calibre.ebooks.epub import initialize_container, PROFILES
|
||||
from calibre.ebooks.epub.split import split
|
||||
from calibre.ebooks.epub.pages import add_page_map
|
||||
from calibre.ebooks.epub.fonts import Rationalizer
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.ui import run_plugins_on_postprocess
|
||||
@ -438,6 +439,9 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
|
||||
if opts.show_ncx:
|
||||
print toc
|
||||
split(opf_path, opts, stylesheet_map)
|
||||
if opts.page:
|
||||
logger.info('\tBuilding page map...')
|
||||
add_page_map(opf_path, opts)
|
||||
check_links(opf_path, opts.pretty_print)
|
||||
|
||||
opf = OPF(opf_path, tdir)
|
||||
|
59
src/calibre/ebooks/epub/pages.py
Normal file
59
src/calibre/ebooks/epub/pages.py
Normal file
@ -0,0 +1,59 @@
|
||||
'''
|
||||
Add page mapping information to an EPUB book.
|
||||
'''
|
||||
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re
|
||||
from itertools import count, chain
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
||||
from calibre.ebooks.oeb.base import OEBBook, DirWriter
|
||||
from lxml import etree, html
|
||||
from lxml.etree import XPath
|
||||
|
||||
NSMAP = {'h': XHTML_NS, 'html': XHTML_NS, 'xhtml': XHTML_NS}
|
||||
PAGE_RE = re.compile(r'page', re.IGNORECASE)
|
||||
ROMAN_RE = re.compile(r'^[ivxlcdm]+$', re.IGNORECASE)
|
||||
|
||||
def filter_name(name):
|
||||
name = name.strip()
|
||||
name = PAGE_RE.sub('', name)
|
||||
for word in name.split():
|
||||
if word.isdigit() or ROMAN_RE.match(word):
|
||||
name = word
|
||||
break
|
||||
return name
|
||||
|
||||
def build_name_for(expr):
|
||||
if not expr:
|
||||
counter = count(1)
|
||||
return lambda elem: str(counter.next())
|
||||
selector = XPath(expr, namespaces=NSMAP)
|
||||
def name_for(elem):
|
||||
results = selector(elem)
|
||||
if not results:
|
||||
return ''
|
||||
name = ' '.join(results)
|
||||
return filter_name(name)
|
||||
return name_for
|
||||
|
||||
def add_page_map(opfpath, opts):
|
||||
oeb = OEBBook(opfpath)
|
||||
selector = XPath(opts.page, namespaces=NSMAP)
|
||||
name_for = build_name_for(opts.page_names)
|
||||
idgen = ("calibre-page-%d" % n for n in count(1))
|
||||
for item in oeb.spine:
|
||||
data = item.data
|
||||
for elem in selector(data):
|
||||
name = name_for(elem)
|
||||
id = elem.get('id', None)
|
||||
if id is None:
|
||||
id = elem.attrib['id'] = idgen.next()
|
||||
href = '#'.join((item.href, id))
|
||||
oeb.pages.add(name, href)
|
||||
writer = DirWriter(version='2.0', page_map=True)
|
||||
writer.dump(oeb, opfpath)
|
@ -246,6 +246,10 @@ class DirWriter(object):
|
||||
|
||||
def dump(self, oeb, path):
|
||||
version = int(self.version[0])
|
||||
opfname = None
|
||||
if os.path.splitext(path)[1].lower() == '.opf':
|
||||
opfname = os.path.basename(path)
|
||||
path = os.path.dirname(path)
|
||||
if not os.path.isdir(path):
|
||||
os.mkdir(path)
|
||||
output = DirContainer(path)
|
||||
@ -257,7 +261,9 @@ class DirWriter(object):
|
||||
metadata = oeb.to_opf2(page_map=self.page_map)
|
||||
else:
|
||||
raise OEBError("Unrecognized OPF version %r" % self.version)
|
||||
for href, data in metadata.values():
|
||||
for mime, (href, data) in metadata.items():
|
||||
if opfname and mime == OPF_MIME:
|
||||
href = opfname
|
||||
output.write(href, xml2str(data))
|
||||
return
|
||||
|
||||
@ -551,9 +557,6 @@ class Manifest(object):
|
||||
for elem in data:
|
||||
nroot.append(elem)
|
||||
data = nroot
|
||||
# Remove any encoding-specifying <meta/> elements
|
||||
for meta in self.META_XP(data):
|
||||
meta.getparent().remove(meta)
|
||||
# Ensure has a <head/>
|
||||
head = xpath(data, '/h:html/h:head')
|
||||
head = head[0] if head else None
|
||||
@ -569,6 +572,12 @@ class Manifest(object):
|
||||
'File %r missing <title/> element' % self.href)
|
||||
title = etree.SubElement(head, XHTML('title'))
|
||||
title.text = self.oeb.translate(__('Unknown'))
|
||||
# Remove any encoding-specifying <meta/> elements
|
||||
for meta in self.META_XP(data):
|
||||
meta.getparent().remove(meta)
|
||||
etree.SubElement(head, XHTML('meta'),
|
||||
attrib={'http-equiv': 'Content-Type',
|
||||
'content': '%s; charset=utf-8' % XHTML_NS})
|
||||
# Ensure has a <body/>
|
||||
if not xpath(data, '/h:html/h:body'):
|
||||
self.oeb.logger.warn(
|
||||
|
@ -224,6 +224,7 @@ class Config(ResizableDialog, Ui_Dialog):
|
||||
g.setValue(val)
|
||||
elif isinstance(g, (QLineEdit, QTextEdit)):
|
||||
getattr(g, 'setPlainText', g.setText)(val)
|
||||
getattr(g, 'setCursorPosition', lambda x: x)(0)
|
||||
elif isinstance(g, QComboBox):
|
||||
for value in pref.choices:
|
||||
g.addItem(value)
|
||||
@ -253,7 +254,8 @@ class Config(ResizableDialog, Ui_Dialog):
|
||||
self.source_format = d.format()
|
||||
|
||||
def accept(self):
|
||||
for opt in ('chapter', 'level1_toc', 'level2_toc', 'level3_toc'):
|
||||
for opt in ('chapter', 'level1_toc', 'level2_toc', 'level3_toc', 'page',
|
||||
'page_names'):
|
||||
text = unicode(getattr(self, 'opt_'+opt).text())
|
||||
if text:
|
||||
try:
|
||||
|
@ -524,7 +524,7 @@
|
||||
</layout>
|
||||
</widget>
|
||||
<widget class="QWidget" name="pagesetup_page" >
|
||||
<layout class="QGridLayout" name="_13" >
|
||||
<layout class="QGridLayout" name="gridLayout_7" >
|
||||
<item row="0" column="0" >
|
||||
<widget class="QLabel" name="profile_label" >
|
||||
<property name="text" >
|
||||
@ -545,6 +545,32 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0" >
|
||||
<widget class="QLabel" name="source_profile_label" >
|
||||
<property name="text" >
|
||||
<string>&Source profile:</string>
|
||||
</property>
|
||||
<property name="buddy" >
|
||||
<cstring>opt_source_profile</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1" >
|
||||
<widget class="QComboBox" name="opt_source_profile" />
|
||||
</item>
|
||||
<item row="2" column="0" >
|
||||
<widget class="QLabel" name="dest_profile_label" >
|
||||
<property name="text" >
|
||||
<string>&Destination profile:</string>
|
||||
</property>
|
||||
<property name="buddy" >
|
||||
<cstring>opt_dest_profile</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="1" >
|
||||
<widget class="QComboBox" name="opt_dest_profile" />
|
||||
</item>
|
||||
<item row="3" column="0" >
|
||||
<widget class="QLabel" name="label_12" >
|
||||
<property name="text" >
|
||||
@ -644,31 +670,72 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0" >
|
||||
<widget class="QLabel" name="source_profile_label" >
|
||||
<property name="text" >
|
||||
<string>&Source profile:</string>
|
||||
</property>
|
||||
<property name="buddy" >
|
||||
<cstring>opt_source_profile</cstring>
|
||||
<item row="8" column="0" colspan="2" >
|
||||
<widget class="QGroupBox" name="page_map_box" >
|
||||
<property name="title" >
|
||||
<string>&Page map</string>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="gridLayout" >
|
||||
<item rowspan="2" row="0" column="0" colspan="4" >
|
||||
<widget class="QLabel" name="label_23" >
|
||||
<property name="sizePolicy" >
|
||||
<sizepolicy vsizetype="Minimum" hsizetype="Preferred" >
|
||||
<horstretch>0</horstretch>
|
||||
<verstretch>0</verstretch>
|
||||
</sizepolicy>
|
||||
</property>
|
||||
<property name="text" >
|
||||
<string><p>You can control how calibre detects page boundaries using a XPath expression. To learn how to use XPath expressions see the <a href="http://calibre.kovidgoyal.net/user_manual/xpath.html">XPath tutorial</a>. The page boundaries are useful only if you want a mapping from pages in a paper book, to locations in the e-book. This controls where Adobe Digital Editions displays the page numbers in the right margin.</p></string>
|
||||
</property>
|
||||
<property name="wordWrap" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
<property name="openExternalLinks" >
|
||||
<bool>true</bool>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0" >
|
||||
<widget class="QLabel" name="label_21" >
|
||||
<property name="text" >
|
||||
<string>&Boundary XPath:</string>
|
||||
</property>
|
||||
<property name="buddy" >
|
||||
<cstring>opt_page</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1" >
|
||||
<widget class="QLineEdit" name="opt_page" />
|
||||
</item>
|
||||
<item row="1" column="2" >
|
||||
<widget class="QLabel" name="label_22" >
|
||||
<property name="text" >
|
||||
<string>&Name XPath:</string>
|
||||
</property>
|
||||
<property name="buddy" >
|
||||
<cstring>opt_page_names</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="3" >
|
||||
<widget class="QLineEdit" name="opt_page_names" />
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1" >
|
||||
<widget class="QComboBox" name="opt_source_profile" />
|
||||
</item>
|
||||
<item row="2" column="0" >
|
||||
<widget class="QLabel" name="dest_profile_label" >
|
||||
<property name="text" >
|
||||
<string>&Destination profile:</string>
|
||||
<item row="9" column="0" >
|
||||
<spacer name="verticalSpacer" >
|
||||
<property name="orientation" >
|
||||
<enum>Qt::Vertical</enum>
|
||||
</property>
|
||||
<property name="buddy" >
|
||||
<cstring>opt_dest_profile</cstring>
|
||||
<property name="sizeHint" stdset="0" >
|
||||
<size>
|
||||
<width>20</width>
|
||||
<height>40</height>
|
||||
</size>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="1" >
|
||||
<widget class="QComboBox" name="opt_dest_profile" />
|
||||
</spacer>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
@ -679,21 +746,8 @@
|
||||
<property name="title" >
|
||||
<string>Automatic &chapter detection</string>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="gridLayout" >
|
||||
<item row="1" column="0" >
|
||||
<widget class="QLabel" name="label_17" >
|
||||
<property name="text" >
|
||||
<string>&XPath:</string>
|
||||
</property>
|
||||
<property name="buddy" >
|
||||
<cstring>opt_chapter</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1" >
|
||||
<widget class="QLineEdit" name="opt_chapter" />
|
||||
</item>
|
||||
<item row="0" column="0" colspan="2" >
|
||||
<layout class="QVBoxLayout" name="verticalLayout_4" >
|
||||
<item>
|
||||
<widget class="QLabel" name="label_8" >
|
||||
<property name="text" >
|
||||
<string><p>You can control how calibre detects chapters using a XPath expression. To learn how to use XPath expressions see the <a href="http://calibre.kovidgoyal.net/user_manual/xpath.html">XPath tutorial</a></p></string>
|
||||
@ -709,18 +763,35 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="1" >
|
||||
<widget class="QComboBox" name="opt_chapter_mark" />
|
||||
</item>
|
||||
<item row="2" column="0" >
|
||||
<widget class="QLabel" name="label_9" >
|
||||
<property name="text" >
|
||||
<string>Chapter &mark:</string>
|
||||
</property>
|
||||
<property name="buddy" >
|
||||
<cstring>opt_chapter_mark</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
<item>
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_3" >
|
||||
<item>
|
||||
<widget class="QLabel" name="label_17" >
|
||||
<property name="text" >
|
||||
<string>&XPath:</string>
|
||||
</property>
|
||||
<property name="buddy" >
|
||||
<cstring>opt_chapter</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLineEdit" name="opt_chapter" />
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QLabel" name="label_9" >
|
||||
<property name="text" >
|
||||
<string>Chapter &mark:</string>
|
||||
</property>
|
||||
<property name="buddy" >
|
||||
<cstring>opt_chapter_mark</cstring>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QComboBox" name="opt_chapter_mark" />
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
@ -847,7 +918,7 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0" colspan="2" >
|
||||
<item row="3" column="0" colspan="2" >
|
||||
<widget class="QDialogButtonBox" name="buttonBox" >
|
||||
<property name="orientation" >
|
||||
<enum>Qt::Horizontal</enum>
|
||||
|
@ -19,4 +19,5 @@ class Config(_Config):
|
||||
self.opt_dont_split_on_page_breaks.setVisible(False)
|
||||
self.opt_preserve_tag_structure.setVisible(False)
|
||||
self.opt_linearize_tables.setVisible(False)
|
||||
self.opt_no_justification.setVisible(False)
|
||||
self.opt_no_justification.setVisible(False)
|
||||
self.page_map_box.setVisible(False)
|
Loading…
x
Reference in New Issue
Block a user