diff --git a/src/calibre/ebooks/epub/__init__.py b/src/calibre/ebooks/epub/__init__.py index 863f2f8db0..aa17024d50 100644 --- a/src/calibre/ebooks/epub/__init__.py +++ b/src/calibre/ebooks/epub/__init__.py @@ -153,6 +153,14 @@ help on using this feature. 'slow and if your source file contains a very large ' 'number of page breaks, you should turn off splitting ' 'on page breaks.')) + structure('page', ['--page'], default=None, + help=_('XPath expression to detect page boundaries for building ' + 'a custom pagination map, as used by AdobeDE. Default is ' + 'not to build an explicit pagination map.')) + structure('page_names', ['--page-names'], default=None, + help=_('XPath expression to find the name of each page in the ' + 'pagination map relative to its boundary element. ' + 'Default is to number all pages staring with 1.')) toc = c.add_group('toc', _('''\ Control the automatic generation of a Table of Contents. If an OPF file is detected @@ -230,4 +238,4 @@ to auto-generate a Table of Contents. c.add_opt('extract_to', ['--extract-to'], group='debug', default=None, help=_('Extract the contents of the produced EPUB file to the ' 'specified directory.')) - return c \ No newline at end of file + return c diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py index ca50fe7a5d..bd9b59cfbd 100644 --- a/src/calibre/ebooks/epub/from_html.py +++ b/src/calibre/ebooks/epub/from_html.py @@ -46,6 +46,7 @@ from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.epub import initialize_container, PROFILES from calibre.ebooks.epub.split import split +from calibre.ebooks.epub.pages import add_page_map from calibre.ebooks.epub.fonts import Rationalizer from calibre.constants import preferred_encoding from calibre.customize.ui import run_plugins_on_postprocess @@ -438,6 +439,9 @@ def convert(htmlfile, opts, notification=None, create_epub=True, if opts.show_ncx: print toc split(opf_path, opts, stylesheet_map) + if opts.page: + logger.info('\tBuilding page map...') + add_page_map(opf_path, opts) check_links(opf_path, opts.pretty_print) opf = OPF(opf_path, tdir) diff --git a/src/calibre/ebooks/epub/pages.py b/src/calibre/ebooks/epub/pages.py new file mode 100644 index 0000000000..1ab5edde86 --- /dev/null +++ b/src/calibre/ebooks/epub/pages.py @@ -0,0 +1,59 @@ +''' +Add page mapping information to an EPUB book. +''' + +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2008, Marshall T. Vandegrift ' +__docformat__ = 'restructuredtext en' + +import os, re +from itertools import count, chain +from calibre.ebooks.oeb.base import XHTML, XHTML_NS +from calibre.ebooks.oeb.base import OEBBook, DirWriter +from lxml import etree, html +from lxml.etree import XPath + +NSMAP = {'h': XHTML_NS, 'html': XHTML_NS, 'xhtml': XHTML_NS} +PAGE_RE = re.compile(r'page', re.IGNORECASE) +ROMAN_RE = re.compile(r'^[ivxlcdm]+$', re.IGNORECASE) + +def filter_name(name): + name = name.strip() + name = PAGE_RE.sub('', name) + for word in name.split(): + if word.isdigit() or ROMAN_RE.match(word): + name = word + break + return name + +def build_name_for(expr): + if not expr: + counter = count(1) + return lambda elem: str(counter.next()) + selector = XPath(expr, namespaces=NSMAP) + def name_for(elem): + results = selector(elem) + if not results: + return '' + name = ' '.join(results) + return filter_name(name) + return name_for + +def add_page_map(opfpath, opts): + oeb = OEBBook(opfpath) + selector = XPath(opts.page, namespaces=NSMAP) + name_for = build_name_for(opts.page_names) + idgen = ("calibre-page-%d" % n for n in count(1)) + for item in oeb.spine: + data = item.data + for elem in selector(data): + name = name_for(elem) + id = elem.get('id', None) + if id is None: + id = elem.attrib['id'] = idgen.next() + href = '#'.join((item.href, id)) + oeb.pages.add(name, href) + writer = DirWriter(version='2.0', page_map=True) + writer.dump(oeb, opfpath) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 778cec54cf..80d4797905 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -246,6 +246,10 @@ class DirWriter(object): def dump(self, oeb, path): version = int(self.version[0]) + opfname = None + if os.path.splitext(path)[1].lower() == '.opf': + opfname = os.path.basename(path) + path = os.path.dirname(path) if not os.path.isdir(path): os.mkdir(path) output = DirContainer(path) @@ -257,7 +261,9 @@ class DirWriter(object): metadata = oeb.to_opf2(page_map=self.page_map) else: raise OEBError("Unrecognized OPF version %r" % self.version) - for href, data in metadata.values(): + for mime, (href, data) in metadata.items(): + if opfname and mime == OPF_MIME: + href = opfname output.write(href, xml2str(data)) return @@ -551,9 +557,6 @@ class Manifest(object): for elem in data: nroot.append(elem) data = nroot - # Remove any encoding-specifying elements - for meta in self.META_XP(data): - meta.getparent().remove(meta) # Ensure has a head = xpath(data, '/h:html/h:head') head = head[0] if head else None @@ -569,6 +572,12 @@ class Manifest(object): 'File %r missing element' % self.href) title = etree.SubElement(head, XHTML('title')) title.text = self.oeb.translate(__('Unknown')) + # Remove any encoding-specifying <meta/> elements + for meta in self.META_XP(data): + meta.getparent().remove(meta) + etree.SubElement(head, XHTML('meta'), + attrib={'http-equiv': 'Content-Type', + 'content': '%s; charset=utf-8' % XHTML_NS}) # Ensure has a <body/> if not xpath(data, '/h:html/h:body'): self.oeb.logger.warn( diff --git a/src/calibre/gui2/dialogs/epub.py b/src/calibre/gui2/dialogs/epub.py index 387a495f87..fb8e6bf71e 100644 --- a/src/calibre/gui2/dialogs/epub.py +++ b/src/calibre/gui2/dialogs/epub.py @@ -224,6 +224,7 @@ class Config(ResizableDialog, Ui_Dialog): g.setValue(val) elif isinstance(g, (QLineEdit, QTextEdit)): getattr(g, 'setPlainText', g.setText)(val) + getattr(g, 'setCursorPosition', lambda x: x)(0) elif isinstance(g, QComboBox): for value in pref.choices: g.addItem(value) @@ -253,7 +254,8 @@ class Config(ResizableDialog, Ui_Dialog): self.source_format = d.format() def accept(self): - for opt in ('chapter', 'level1_toc', 'level2_toc', 'level3_toc'): + for opt in ('chapter', 'level1_toc', 'level2_toc', 'level3_toc', 'page', + 'page_names'): text = unicode(getattr(self, 'opt_'+opt).text()) if text: try: diff --git a/src/calibre/gui2/dialogs/epub.ui b/src/calibre/gui2/dialogs/epub.ui index 3cf19a1f91..2d5c0fa153 100644 --- a/src/calibre/gui2/dialogs/epub.ui +++ b/src/calibre/gui2/dialogs/epub.ui @@ -524,7 +524,7 @@ </layout> </widget> <widget class="QWidget" name="pagesetup_page" > - <layout class="QGridLayout" name="_13" > + <layout class="QGridLayout" name="gridLayout_7" > <item row="0" column="0" > <widget class="QLabel" name="profile_label" > <property name="text" > @@ -545,6 +545,32 @@ </property> </widget> </item> + <item row="1" column="0" > + <widget class="QLabel" name="source_profile_label" > + <property name="text" > + <string>&Source profile:</string> + </property> + <property name="buddy" > + <cstring>opt_source_profile</cstring> + </property> + </widget> + </item> + <item row="1" column="1" > + <widget class="QComboBox" name="opt_source_profile" /> + </item> + <item row="2" column="0" > + <widget class="QLabel" name="dest_profile_label" > + <property name="text" > + <string>&Destination profile:</string> + </property> + <property name="buddy" > + <cstring>opt_dest_profile</cstring> + </property> + </widget> + </item> + <item row="2" column="1" > + <widget class="QComboBox" name="opt_dest_profile" /> + </item> <item row="3" column="0" > <widget class="QLabel" name="label_12" > <property name="text" > @@ -644,31 +670,72 @@ </property> </widget> </item> - <item row="1" column="0" > - <widget class="QLabel" name="source_profile_label" > - <property name="text" > - <string>&Source profile:</string> - </property> - <property name="buddy" > - <cstring>opt_source_profile</cstring> + <item row="8" column="0" colspan="2" > + <widget class="QGroupBox" name="page_map_box" > + <property name="title" > + <string>&Page map</string> </property> + <layout class="QGridLayout" name="gridLayout" > + <item rowspan="2" row="0" column="0" colspan="4" > + <widget class="QLabel" name="label_23" > + <property name="sizePolicy" > + <sizepolicy vsizetype="Minimum" hsizetype="Preferred" > + <horstretch>0</horstretch> + <verstretch>0</verstretch> + </sizepolicy> + </property> + <property name="text" > + <string><p>You can control how calibre detects page boundaries using a XPath expression. To learn how to use XPath expressions see the <a href="http://calibre.kovidgoyal.net/user_manual/xpath.html">XPath tutorial</a>. The page boundaries are useful only if you want a mapping from pages in a paper book, to locations in the e-book. This controls where Adobe Digital Editions displays the page numbers in the right margin.</p></string> + </property> + <property name="wordWrap" > + <bool>true</bool> + </property> + <property name="openExternalLinks" > + <bool>true</bool> + </property> + </widget> + </item> + <item row="1" column="0" > + <widget class="QLabel" name="label_21" > + <property name="text" > + <string>&Boundary XPath:</string> + </property> + <property name="buddy" > + <cstring>opt_page</cstring> + </property> + </widget> + </item> + <item row="1" column="1" > + <widget class="QLineEdit" name="opt_page" /> + </item> + <item row="1" column="2" > + <widget class="QLabel" name="label_22" > + <property name="text" > + <string>&Name XPath:</string> + </property> + <property name="buddy" > + <cstring>opt_page_names</cstring> + </property> + </widget> + </item> + <item row="1" column="3" > + <widget class="QLineEdit" name="opt_page_names" /> + </item> + </layout> </widget> </item> - <item row="1" column="1" > - <widget class="QComboBox" name="opt_source_profile" /> - </item> - <item row="2" column="0" > - <widget class="QLabel" name="dest_profile_label" > - <property name="text" > - <string>&Destination profile:</string> + <item row="9" column="0" > + <spacer name="verticalSpacer" > + <property name="orientation" > + <enum>Qt::Vertical</enum> </property> - <property name="buddy" > - <cstring>opt_dest_profile</cstring> + <property name="sizeHint" stdset="0" > + <size> + <width>20</width> + <height>40</height> + </size> </property> - </widget> - </item> - <item row="2" column="1" > - <widget class="QComboBox" name="opt_dest_profile" /> + </spacer> </item> </layout> </widget> @@ -679,21 +746,8 @@ <property name="title" > <string>Automatic &chapter detection</string> </property> - <layout class="QGridLayout" name="gridLayout" > - <item row="1" column="0" > - <widget class="QLabel" name="label_17" > - <property name="text" > - <string>&XPath:</string> - </property> - <property name="buddy" > - <cstring>opt_chapter</cstring> - </property> - </widget> - </item> - <item row="1" column="1" > - <widget class="QLineEdit" name="opt_chapter" /> - </item> - <item row="0" column="0" colspan="2" > + <layout class="QVBoxLayout" name="verticalLayout_4" > + <item> <widget class="QLabel" name="label_8" > <property name="text" > <string><p>You can control how calibre detects chapters using a XPath expression. To learn how to use XPath expressions see the <a href="http://calibre.kovidgoyal.net/user_manual/xpath.html">XPath tutorial</a></p></string> @@ -709,18 +763,35 @@ </property> </widget> </item> - <item row="2" column="1" > - <widget class="QComboBox" name="opt_chapter_mark" /> - </item> - <item row="2" column="0" > - <widget class="QLabel" name="label_9" > - <property name="text" > - <string>Chapter &mark:</string> - </property> - <property name="buddy" > - <cstring>opt_chapter_mark</cstring> - </property> - </widget> + <item> + <layout class="QHBoxLayout" name="horizontalLayout_3" > + <item> + <widget class="QLabel" name="label_17" > + <property name="text" > + <string>&XPath:</string> + </property> + <property name="buddy" > + <cstring>opt_chapter</cstring> + </property> + </widget> + </item> + <item> + <widget class="QLineEdit" name="opt_chapter" /> + </item> + <item> + <widget class="QLabel" name="label_9" > + <property name="text" > + <string>Chapter &mark:</string> + </property> + <property name="buddy" > + <cstring>opt_chapter_mark</cstring> + </property> + </widget> + </item> + <item> + <widget class="QComboBox" name="opt_chapter_mark" /> + </item> + </layout> </item> </layout> </widget> @@ -847,7 +918,7 @@ </property> </widget> </item> - <item row="2" column="0" colspan="2" > + <item row="3" column="0" colspan="2" > <widget class="QDialogButtonBox" name="buttonBox" > <property name="orientation" > <enum>Qt::Horizontal</enum> diff --git a/src/calibre/gui2/dialogs/mobi.py b/src/calibre/gui2/dialogs/mobi.py index 0cf4d78b8c..7d0324e0f4 100644 --- a/src/calibre/gui2/dialogs/mobi.py +++ b/src/calibre/gui2/dialogs/mobi.py @@ -19,4 +19,5 @@ class Config(_Config): self.opt_dont_split_on_page_breaks.setVisible(False) self.opt_preserve_tag_structure.setVisible(False) self.opt_linearize_tables.setVisible(False) - self.opt_no_justification.setVisible(False) \ No newline at end of file + self.opt_no_justification.setVisible(False) + self.page_map_box.setVisible(False) \ No newline at end of file