diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py
index 6530e5f16c..c531a15e34 100644
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@@ -122,8 +122,9 @@ class InputFormatPlugin(Plugin):
def convert(self, stream, options, file_ext, log, accelerators):
'''
This method must be implemented in sub-classes. It must return
- the path to the created OPF file. All output should be contained in
- the current directory. If this plugin creates files outside the current
+ the path to the created OPF file or an :class:`OEBBook` instance.
+ All output should be contained in the current directory.
+ If this plugin creates files outside the current
directory they must be deleted/marked for deletion before this method
returns.
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 6142cb555a..41d5f0abd9 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -299,21 +299,15 @@ OptionRecommendation(name='language',
# Create an OEBBook from the input file. The input plugin does all the
# heavy lifting.
- from calibre.ebooks.oeb.reader import OEBReader
- from calibre.ebooks.oeb.base import OEBBook
accelerators = {}
tdir = PersistentTemporaryDirectory('_plumber')
- opfpath = self.input_plugin(open(self.input, 'rb'), self.opts,
+ self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
self.input_fmt, self.log,
accelerators, tdir)
- html_preprocessor = HTMLPreProcessor()
- self.reader = OEBReader()
- self.oeb = OEBBook(self.log, html_preprocessor=html_preprocessor)
- # Read OEB Book into OEBBook
- self.log.info('Parsing all content...')
- self.reader(self.oeb, opfpath)
+ if not hasattr(self.oeb, 'manifest'):
+ self.oeb = create_oebbook(self.log, self.oeb)
self.opts.source = self.opts.input_profile
self.opts.dest = self.opts.output_profile
@@ -340,7 +334,20 @@ OptionRecommendation(name='language',
trimmer(self.oeb, self.opts)
self.log.info('Creating %s...'%self.output_plugin.name)
- self.output_plugin.convert(self.oeb, self.output, self.input_plugin, self.opts,
- self.log)
+ self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
+ self.opts, self.log)
+def create_oebbook(log, opfpath):
+ '''
+ Create an OEBBook from an OPF file.
+ '''
+ from calibre.ebooks.oeb.reader import OEBReader
+ from calibre.ebooks.oeb.base import OEBBook
+ html_preprocessor = HTMLPreProcessor()
+ reader = OEBReader()
+ oeb = OEBBook(log, html_preprocessor=html_preprocessor)
+ # Read OEB Book into OEBBook
+ log.info('Parsing all content...')
+ reader(oeb, opfpath)
+ return oeb
diff --git a/src/calibre/ebooks/epub/__init__.py b/src/calibre/ebooks/epub/__init__.py
index 0be88da070..2bc076a8ad 100644
--- a/src/calibre/ebooks/epub/__init__.py
+++ b/src/calibre/ebooks/epub/__init__.py
@@ -10,23 +10,23 @@ import sys, textwrap, re, os, uuid
from itertools import cycle
from calibre.utils.config import Config, StringConfig
from calibre.utils.zipfile import ZipFile, ZIP_STORED
-from calibre.ebooks.html import config as common_config, tostring
+from calibre.ebooks.html import tostring
from lxml import etree
class DefaultProfile(object):
-
+
flow_size = sys.maxint
screen_size = None
remove_special_chars = False
remove_object_tags = False
-
+
class PRS505(DefaultProfile):
-
+
flow_size = 270000
screen_size = (590, 765)
remove_special_chars = re.compile(u'[\u200b\u00ad]')
remove_object_tags = True
-
+
PROFILES = {
'PRS505' : PRS505,
@@ -64,11 +64,11 @@ def config(defaults=None, name='epub'):
c = Config(name, desc)
else:
c = StringConfig(defaults, desc)
-
+
c.update(common_config())
c.remove_opt('output')
c.remove_opt('zip')
-
+
c.add_opt('output', ['-o', '--output'], default=None,
help=_('The output EPUB file. If not specified, it is '
'derived from the input file name.'))
@@ -81,22 +81,22 @@ def config(defaults=None, name='epub'):
help=_('Either the path to a CSS stylesheet or raw CSS. '
'This CSS will override any existing CSS '
'declarations in the source files.'))
- structure = c.add_group('structure detection',
+ structure = c.add_group('structure detection',
_('Control auto-detection of document structure.'))
- structure('chapter', ['--chapter'],
+ structure('chapter', ['--chapter'],
default="//*[re:match(name(), 'h[1-2]') and "
"re:test(., 'chapter|book|section|part', 'i')] | "
"//*[@class = 'chapter']",
help=_('''\
An XPath expression to detect chapter titles. The default is to consider
or
- tags that contain the words "chapter","book","section" or "part" as chapter titles as
-well as any tags that have class="chapter".
+ tags that contain the words "chapter","book","section" or "part" as chapter titles as
+well as any tags that have class="chapter".
The expression used must evaluate to a list of elements. To disable chapter detection,
use the expression "/". See the XPath Tutorial in the calibre User Manual for further
help on using this feature.
''').replace('\n', ' '))
structure('chapter_mark', ['--chapter-mark'], choices=['pagebreak', 'rule', 'both', 'none'],
- default='pagebreak',
+ default='pagebreak',
help=_('Specify how to mark detected chapters. A value of '
'"pagebreak" will insert page breaks before chapters. '
'A value of "rule" will insert a line before chapters. '
@@ -129,13 +129,13 @@ help on using this feature.
help=_('XPath expression to find the name of each page in the '
'pagination map relative to its boundary element. '
'Default is to number all pages staring with 1.'))
- toc = c.add_group('toc',
+ toc = c.add_group('toc',
_('''\
Control the automatic generation of a Table of Contents. If an OPF file is detected
and it specifies a Table of Contents, then that will be used rather than trying
to auto-generate a Table of Contents.
''').replace('\n', ' '))
- toc('max_toc_links', ['--max-toc-links'], default=50,
+ toc('max_toc_links', ['--max-toc-links'], default=50,
help=_('Maximum number of links to insert into the TOC. Set to 0 '
'to disable. Default is: %default. Links are only added to the '
'TOC if less than the --toc-threshold number of chapters were detected.'))
@@ -166,15 +166,15 @@ to auto-generate a Table of Contents.
help=_('Normally, if the source file already has a Table of Contents, '
'it is used in preference to the auto-generated one. '
'With this option, the auto-generated one is always used.'))
-
+
layout = c.add_group('page layout', _('Control page layout'))
- layout('margin_top', ['--margin-top'], default=5.0,
+ layout('margin_top', ['--margin-top'], default=5.0,
help=_('Set the top margin in pts. Default is %default'))
- layout('margin_bottom', ['--margin-bottom'], default=5.0,
+ layout('margin_bottom', ['--margin-bottom'], default=5.0,
help=_('Set the bottom margin in pts. Default is %default'))
- layout('margin_left', ['--margin-left'], default=5.0,
+ layout('margin_left', ['--margin-left'], default=5.0,
help=_('Set the left margin in pts. Default is %default'))
- layout('margin_right', ['--margin-right'], default=5.0,
+ layout('margin_right', ['--margin-right'], default=5.0,
help=_('Set the right margin in pts. Default is %default'))
layout('base_font_size2', ['--base-font-size'], default=12.0,
help=_('The base font size in pts. Default is %defaultpt. '
@@ -195,12 +195,12 @@ to auto-generate a Table of Contents.
'This is only neccessary if the HTML files contain CSS that '
'uses sibling selectors. Enabling this greatly slows down '
'processing of large HTML files.'))
-
+
c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
help=_('Print generated OPF file to stdout'))
c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
help=_('Print generated NCX file to stdout'))
- c.add_opt('keep_intermediate', ['--keep-intermediate-files'], group='debug',
+ c.add_opt('keep_intermediate', ['--keep-intermediate-files'], group='debug',
default=False,
help=_('Keep intermediate files during processing by html2epub'))
c.add_opt('extract_to', ['--extract-to'], group='debug', default=None,
diff --git a/src/calibre/ebooks/epub/fonts.py b/src/calibre/ebooks/epub/fonts.py
index 5d0887f2d0..67e6066ed1 100644
--- a/src/calibre/ebooks/epub/fonts.py
+++ b/src/calibre/ebooks/epub/fonts.py
@@ -14,7 +14,7 @@ from lxml.cssselect import CSSSelector
from lxml import etree
from lxml.html import HtmlElement
-from calibre.ebooks.html import fromstring
+from calibre.ebooks.html_old import fromstring
from calibre.ebooks.epub import rules
from cssutils import CSSParser
@@ -24,7 +24,7 @@ absolute_size = r'(?P(x?x-)?(small|large)|medium)'
relative_size = r'(?Psmaller|larger)'
font_size_pat = re.compile('|'.join((relative_size, absolute_size, length)), re.I)
-line_height_pat = re.compile(r'({num})(px|in|cm|mm|pt|pc)'.replace('{num}', num))
+line_height_pat = re.compile(r'({num})(px|in|cm|mm|pt|pc)'.replace('{num}', num))
PTU = {
'in' : 72.,
@@ -37,12 +37,12 @@ PTU = {
DEFAULT_FONT_SIZE = 12
class Rationalizer(object):
-
+
@classmethod
def specificity(cls, s):
'''Map CSS specificity tuple to a single integer'''
- return sum([10**(4-i) + x for i,x in enumerate(s)])
-
+ return sum([10**(4-i) + x for i,x in enumerate(s)])
+
@classmethod
def compute_font_size(cls, elem):
'''
@@ -59,7 +59,7 @@ class Rationalizer(object):
elem.computed_font_size = sfs(parent.computed_font_size)
else:
elem.computed_font_size = sfs
-
+
@classmethod
def calculate_font_size(cls, style):
'Return font size in pts from style object. For relative units returns a callable'
@@ -69,7 +69,7 @@ class Rationalizer(object):
fs = match.group()
if style.fontSize:
fs = style.fontSize
-
+
match = font_size_pat.search(fs)
if match is None:
return None
@@ -89,8 +89,8 @@ class Rationalizer(object):
return 12 * x
if match.get('zero', False):
return 0.
- return functools.partial(operator.mul, 1.2) if 'larger' in fs.lower() else functools.partial(operator.mul, 0.8)
-
+ return functools.partial(operator.mul, 1.2) if 'larger' in fs.lower() else functools.partial(operator.mul, 0.8)
+
@classmethod
def resolve_rules(cls, stylesheets):
for sheet in stylesheets:
@@ -104,12 +104,12 @@ class Rationalizer(object):
if font_size is not None:
for s in r.selectorList:
sheet.fs_rules.append([CSSSelector(s.selectorText), font_size])
- orig = line_height_pat.search(r.style.lineHeight)
+ orig = line_height_pat.search(r.style.lineHeight)
if orig is not None:
for s in r.selectorList:
sheet.lh_rules.append([CSSSelector(s.selectorText), float(orig.group(1)) * PTU[orig.group(2).lower()]])
-
-
+
+
@classmethod
def apply_font_size_rules(cls, stylesheets, root):
'Add a ``specified_font_size`` attribute to every element that has a specified font size'
@@ -119,7 +119,7 @@ class Rationalizer(object):
elems = selector(root)
for elem in elems:
elem.specified_font_size = font_size
-
+
@classmethod
def remove_font_size_information(cls, stylesheets):
for r in rules(stylesheets):
@@ -134,17 +134,17 @@ class Rationalizer(object):
r.style.removeProperty('font')
if line_height_pat.search(r.style.lineHeight) is not None:
r.style.removeProperty('line-height')
-
+
@classmethod
def compute_font_sizes(cls, root, stylesheets, base=12):
stylesheets = [s for s in stylesheets if hasattr(s, 'cssText')]
cls.apply_font_size_rules(stylesheets, root)
-
+
# Compute the effective font size of all tags
root.computed_font_size = DEFAULT_FONT_SIZE
for elem in root.iter(etree.Element):
cls.compute_font_size(elem)
-
+
extra_css = {}
if base > 0:
# Calculate the "base" (i.e. most common) font size
@@ -157,20 +157,20 @@ class Rationalizer(object):
if t: t = t.strip()
if t:
font_sizes[elem.computed_font_size] += len(t)
-
+
t = getattr(elem, 'tail', '')
if t: t = t.strip()
if t:
parent = elem.getparent()
if parent.tag not in IGNORE:
font_sizes[parent.computed_font_size] += len(t)
-
+
try:
most_common = max(font_sizes.items(), key=operator.itemgetter(1))[0]
scale = base/most_common if most_common > 0 else 1.
except ValueError:
scale = 1.
-
+
# rescale absolute line-heights
counter = 0
for sheet in stylesheets:
@@ -181,17 +181,17 @@ class Rationalizer(object):
if not extra_css.has_key(elem.get('id')):
extra_css[elem.get('id')] = []
extra_css[elem.get('id')].append('line-height:%fpt'%(lh*scale))
-
-
-
+
+
+
# Rescale all computed font sizes
for elem in body.iter(etree.Element):
if isinstance(elem, HtmlElement):
elem.computed_font_size *= scale
-
- # Remove all font size specifications from the last stylesheet
+
+ # Remove all font size specifications from the last stylesheet
cls.remove_font_size_information(stylesheets[-1:])
-
+
# Create the CSS to implement the rescaled font sizes
for elem in body.iter(etree.Element):
cfs, pcfs = map(operator.attrgetter('computed_font_size'), (elem, elem.getparent()))
@@ -201,12 +201,12 @@ class Rationalizer(object):
if not extra_css.has_key(elem.get('id')):
extra_css[elem.get('id')] = []
extra_css[elem.get('id')].append('font-size: %f%%'%(100*(cfs/pcfs)))
-
+
css = CSSParser(loglevel=logging.ERROR).parseString('')
for id, r in extra_css.items():
css.add('#%s {%s}'%(id, ';'.join(r)))
return css
-
+
@classmethod
def rationalize(cls, stylesheets, root, opts):
logger = logging.getLogger('html2epub')
@@ -229,7 +229,7 @@ class Rationalizer(object):
################################################################################
class FontTest(unittest.TestCase):
-
+
def setUp(self):
from calibre.ebooks.epub import config
self.opts = config(defaults='').parse()
@@ -246,10 +246,10 @@ class FontTest(unittest.TestCase):
Some other text.
The longest piece of single font size text in this entire file. Used to test resizing.