mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement adding page-map information to EPUB output.
This commit is contained in:
parent
9929ba8eeb
commit
28a6f11a94
@ -153,6 +153,14 @@ help on using this feature.
|
|||||||
'slow and if your source file contains a very large '
|
'slow and if your source file contains a very large '
|
||||||
'number of page breaks, you should turn off splitting '
|
'number of page breaks, you should turn off splitting '
|
||||||
'on page breaks.'))
|
'on page breaks.'))
|
||||||
|
structure('page', ['--page'], default=None,
|
||||||
|
help=_('XPath expression to detect page boundaries for building '
|
||||||
|
'a custom pagination map, as used by AdobeDE. Default is '
|
||||||
|
'not to build an explicit pagination map.'))
|
||||||
|
structure('page_names', ['--page-names'], default=None,
|
||||||
|
help=_('XPath expression to find the name of each page in the '
|
||||||
|
'pagination map relative to its boundary element. '
|
||||||
|
'Default is to number all pages staring with 1.'))
|
||||||
toc = c.add_group('toc',
|
toc = c.add_group('toc',
|
||||||
_('''\
|
_('''\
|
||||||
Control the automatic generation of a Table of Contents. If an OPF file is detected
|
Control the automatic generation of a Table of Contents. If an OPF file is detected
|
||||||
|
@ -46,6 +46,7 @@ from calibre.ebooks.metadata.toc import TOC
|
|||||||
from calibre.ebooks.metadata.opf2 import OPF
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
from calibre.ebooks.epub import initialize_container, PROFILES
|
from calibre.ebooks.epub import initialize_container, PROFILES
|
||||||
from calibre.ebooks.epub.split import split
|
from calibre.ebooks.epub.split import split
|
||||||
|
from calibre.ebooks.epub.pages import add_page_map
|
||||||
from calibre.ebooks.epub.fonts import Rationalizer
|
from calibre.ebooks.epub.fonts import Rationalizer
|
||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
from calibre.customize.ui import run_plugins_on_postprocess
|
from calibre.customize.ui import run_plugins_on_postprocess
|
||||||
@ -438,6 +439,8 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
|
|||||||
if opts.show_ncx:
|
if opts.show_ncx:
|
||||||
print toc
|
print toc
|
||||||
split(opf_path, opts, stylesheet_map)
|
split(opf_path, opts, stylesheet_map)
|
||||||
|
if opts.page:
|
||||||
|
add_page_map(opf_path, opts)
|
||||||
check_links(opf_path, opts.pretty_print)
|
check_links(opf_path, opts.pretty_print)
|
||||||
|
|
||||||
opf = OPF(opf_path, tdir)
|
opf = OPF(opf_path, tdir)
|
||||||
|
59
src/calibre/ebooks/epub/pages.py
Normal file
59
src/calibre/ebooks/epub/pages.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
'''
|
||||||
|
Add page mapping information to an EPUB book.
|
||||||
|
'''
|
||||||
|
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, re
|
||||||
|
from itertools import count, chain
|
||||||
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
||||||
|
from calibre.ebooks.oeb.base import OEBBook, DirWriter
|
||||||
|
from lxml import etree, html
|
||||||
|
from lxml.etree import XPath
|
||||||
|
|
||||||
|
NSMAP = {'h': XHTML_NS, 'html': XHTML_NS, 'xhtml': XHTML_NS}
|
||||||
|
PAGE_RE = re.compile(r'page', re.IGNORECASE)
|
||||||
|
ROMAN_RE = re.compile(r'^[ivxlcdm]+$', re.IGNORECASE)
|
||||||
|
|
||||||
|
def filter_name(name):
|
||||||
|
name = name.strip()
|
||||||
|
name = PAGE_RE.sub('', name)
|
||||||
|
for word in name.split():
|
||||||
|
if word.isdigit() or ROMAN_RE.match(word):
|
||||||
|
name = word
|
||||||
|
break
|
||||||
|
return name
|
||||||
|
|
||||||
|
def build_name_for(expr):
|
||||||
|
if expr is None:
|
||||||
|
counter = count(1)
|
||||||
|
return lambda elem: str(counter.next())
|
||||||
|
selector = XPath(expr, namespaces=NSMAP)
|
||||||
|
def name_for(elem):
|
||||||
|
results = selector(elem)
|
||||||
|
if not results:
|
||||||
|
return ''
|
||||||
|
name = ' '.join(results)
|
||||||
|
return filter_name(name)
|
||||||
|
return name_for
|
||||||
|
|
||||||
|
def add_page_map(opfpath, opts):
|
||||||
|
oeb = OEBBook(opfpath)
|
||||||
|
selector = XPath(opts.page, namespaces=NSMAP)
|
||||||
|
name_for = build_name_for(opts.page_names)
|
||||||
|
idgen = ("calibre-page-%d" % n for n in count(1))
|
||||||
|
for item in oeb.spine:
|
||||||
|
data = item.data
|
||||||
|
for elem in selector(data):
|
||||||
|
name = name_for(elem)
|
||||||
|
id = elem.get('id', None)
|
||||||
|
if id is None:
|
||||||
|
id = elem.attrib['id'] = idgen.next()
|
||||||
|
href = '#'.join((item.href, id))
|
||||||
|
oeb.pages.add(name, href)
|
||||||
|
writer = DirWriter(version='2.0', page_map=True)
|
||||||
|
writer.dump(oeb, opfpath)
|
@ -246,6 +246,10 @@ class DirWriter(object):
|
|||||||
|
|
||||||
def dump(self, oeb, path):
|
def dump(self, oeb, path):
|
||||||
version = int(self.version[0])
|
version = int(self.version[0])
|
||||||
|
opfname = None
|
||||||
|
if os.path.splitext(path)[1].lower() == '.opf':
|
||||||
|
opfname = os.path.basename(path)
|
||||||
|
path = os.path.dirname(path)
|
||||||
if not os.path.isdir(path):
|
if not os.path.isdir(path):
|
||||||
os.mkdir(path)
|
os.mkdir(path)
|
||||||
output = DirContainer(path)
|
output = DirContainer(path)
|
||||||
@ -257,7 +261,9 @@ class DirWriter(object):
|
|||||||
metadata = oeb.to_opf2(page_map=self.page_map)
|
metadata = oeb.to_opf2(page_map=self.page_map)
|
||||||
else:
|
else:
|
||||||
raise OEBError("Unrecognized OPF version %r" % self.version)
|
raise OEBError("Unrecognized OPF version %r" % self.version)
|
||||||
for href, data in metadata.values():
|
for mime, (href, data) in metadata.items():
|
||||||
|
if opfname and mime == OPF_MIME:
|
||||||
|
href = opfname
|
||||||
output.write(href, xml2str(data))
|
output.write(href, xml2str(data))
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -551,9 +557,6 @@ class Manifest(object):
|
|||||||
for elem in data:
|
for elem in data:
|
||||||
nroot.append(elem)
|
nroot.append(elem)
|
||||||
data = nroot
|
data = nroot
|
||||||
# Remove any encoding-specifying <meta/> elements
|
|
||||||
for meta in self.META_XP(data):
|
|
||||||
meta.getparent().remove(meta)
|
|
||||||
# Ensure has a <head/>
|
# Ensure has a <head/>
|
||||||
head = xpath(data, '/h:html/h:head')
|
head = xpath(data, '/h:html/h:head')
|
||||||
head = head[0] if head else None
|
head = head[0] if head else None
|
||||||
@ -569,6 +572,12 @@ class Manifest(object):
|
|||||||
'File %r missing <title/> element' % self.href)
|
'File %r missing <title/> element' % self.href)
|
||||||
title = etree.SubElement(head, XHTML('title'))
|
title = etree.SubElement(head, XHTML('title'))
|
||||||
title.text = self.oeb.translate(__('Unknown'))
|
title.text = self.oeb.translate(__('Unknown'))
|
||||||
|
# Remove any encoding-specifying <meta/> elements
|
||||||
|
for meta in self.META_XP(data):
|
||||||
|
meta.getparent().remove(meta)
|
||||||
|
etree.SubElement(head, XHTML('meta'),
|
||||||
|
attrib={'http-equiv': 'Content-Type',
|
||||||
|
'content': '%s; charset=utf-8' % XHTML_NS})
|
||||||
# Ensure has a <body/>
|
# Ensure has a <body/>
|
||||||
if not xpath(data, '/h:html/h:body'):
|
if not xpath(data, '/h:html/h:body'):
|
||||||
self.oeb.logger.warn(
|
self.oeb.logger.warn(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user