mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Initial implementation of EPUB Output plugin
This commit is contained in:
parent
b93029a4fe
commit
4cd285859b
@ -287,13 +287,14 @@ from calibre.ebooks.odt.input import ODTInput
|
|||||||
from calibre.ebooks.rtf.input import RTFInput
|
from calibre.ebooks.rtf.input import RTFInput
|
||||||
from calibre.ebooks.html.input import HTMLInput
|
from calibre.ebooks.html.input import HTMLInput
|
||||||
from calibre.ebooks.oeb.output import OEBOutput
|
from calibre.ebooks.oeb.output import OEBOutput
|
||||||
|
from calibre.ebooks.epub.output import EPUBOutput
|
||||||
from calibre.ebooks.txt.output import TXTOutput
|
from calibre.ebooks.txt.output import TXTOutput
|
||||||
from calibre.ebooks.pdf.output import PDFOutput
|
from calibre.ebooks.pdf.output import PDFOutput
|
||||||
from calibre.customize.profiles import input_profiles, output_profiles
|
from calibre.customize.profiles import input_profiles, output_profiles
|
||||||
|
|
||||||
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
|
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
|
||||||
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
|
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
|
||||||
FB2Input, ODTInput, RTFInput]
|
FB2Input, ODTInput, RTFInput, EPUBOutput]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
x.__name__.endswith('MetadataReader')]
|
x.__name__.endswith('MetadataReader')]
|
||||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
|
||||||
|
@ -3,7 +3,7 @@ __license__ = 'GPL 3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import sys, re
|
import re
|
||||||
from itertools import izip
|
from itertools import izip
|
||||||
|
|
||||||
from calibre.customize import Plugin as _Plugin
|
from calibre.customize import Plugin as _Plugin
|
||||||
@ -22,7 +22,7 @@ class Plugin(_Plugin):
|
|||||||
|
|
||||||
fbase = 12
|
fbase = 12
|
||||||
fsizes = [5, 7, 9, 12, 13.5, 17, 20, 22, 24]
|
fsizes = [5, 7, 9, 12, 13.5, 17, 20, 22, 24]
|
||||||
screen_size = (800, 600)
|
screen_size = (1600, 1200)
|
||||||
dpi = 100
|
dpi = 100
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
@ -6,32 +6,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
'''
|
'''
|
||||||
Conversion to EPUB.
|
Conversion to EPUB.
|
||||||
'''
|
'''
|
||||||
import sys, textwrap, re, os, uuid
|
|
||||||
from itertools import cycle
|
|
||||||
from calibre.utils.config import Config, StringConfig
|
|
||||||
from calibre.utils.zipfile import ZipFile, ZIP_STORED
|
from calibre.utils.zipfile import ZipFile, ZIP_STORED
|
||||||
from calibre.ebooks.html import tostring
|
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
class DefaultProfile(object):
|
|
||||||
|
|
||||||
flow_size = sys.maxint
|
|
||||||
screen_size = None
|
|
||||||
remove_special_chars = False
|
|
||||||
remove_object_tags = False
|
|
||||||
|
|
||||||
class PRS505(DefaultProfile):
|
|
||||||
|
|
||||||
flow_size = 270000
|
|
||||||
screen_size = (590, 765)
|
|
||||||
remove_special_chars = re.compile(u'[\u200b\u00ad]')
|
|
||||||
remove_object_tags = True
|
|
||||||
|
|
||||||
|
|
||||||
PROFILES = {
|
|
||||||
'PRS505' : PRS505,
|
|
||||||
'None' : DefaultProfile,
|
|
||||||
}
|
|
||||||
|
|
||||||
def rules(stylesheets):
|
def rules(stylesheets):
|
||||||
for s in stylesheets:
|
for s in stylesheets:
|
||||||
@ -58,152 +33,4 @@ def initialize_container(path_to_container, opf_name='metadata.opf'):
|
|||||||
zf.writestr('META-INF/container.xml', CONTAINER)
|
zf.writestr('META-INF/container.xml', CONTAINER)
|
||||||
return zf
|
return zf
|
||||||
|
|
||||||
def config(defaults=None, name='epub'):
|
|
||||||
desc = _('Options to control the conversion to EPUB')
|
|
||||||
if defaults is None:
|
|
||||||
c = Config(name, desc)
|
|
||||||
else:
|
|
||||||
c = StringConfig(defaults, desc)
|
|
||||||
|
|
||||||
c.update(common_config())
|
|
||||||
c.remove_opt('output')
|
|
||||||
c.remove_opt('zip')
|
|
||||||
|
|
||||||
c.add_opt('output', ['-o', '--output'], default=None,
|
|
||||||
help=_('The output EPUB file. If not specified, it is '
|
|
||||||
'derived from the input file name.'))
|
|
||||||
c.add_opt('profile', ['--profile'], default='PRS505', choices=list(PROFILES.keys()),
|
|
||||||
help=_('Profile of the target device this EPUB is meant for. '
|
|
||||||
'Set to None to create a device independent EPUB. '
|
|
||||||
'The profile is used for device specific restrictions '
|
|
||||||
'on the EPUB. Choices are: ')+str(list(PROFILES.keys())))
|
|
||||||
c.add_opt('override_css', ['--override-css'], default=None,
|
|
||||||
help=_('Either the path to a CSS stylesheet or raw CSS. '
|
|
||||||
'This CSS will override any existing CSS '
|
|
||||||
'declarations in the source files.'))
|
|
||||||
structure = c.add_group('structure detection',
|
|
||||||
_('Control auto-detection of document structure.'))
|
|
||||||
structure('chapter', ['--chapter'],
|
|
||||||
default="//*[re:match(name(), 'h[1-2]') and "
|
|
||||||
"re:test(., 'chapter|book|section|part', 'i')] | "
|
|
||||||
"//*[@class = 'chapter']",
|
|
||||||
help=_('''\
|
|
||||||
An XPath expression to detect chapter titles. The default is to consider <h1> or
|
|
||||||
<h2> tags that contain the words "chapter","book","section" or "part" as chapter titles as
|
|
||||||
well as any tags that have class="chapter".
|
|
||||||
The expression used must evaluate to a list of elements. To disable chapter detection,
|
|
||||||
use the expression "/". See the XPath Tutorial in the calibre User Manual for further
|
|
||||||
help on using this feature.
|
|
||||||
''').replace('\n', ' '))
|
|
||||||
structure('chapter_mark', ['--chapter-mark'], choices=['pagebreak', 'rule', 'both', 'none'],
|
|
||||||
default='pagebreak',
|
|
||||||
help=_('Specify how to mark detected chapters. A value of '
|
|
||||||
'"pagebreak" will insert page breaks before chapters. '
|
|
||||||
'A value of "rule" will insert a line before chapters. '
|
|
||||||
'A value of "none" will disable chapter marking and a '
|
|
||||||
'value of "both" will use both page breaks and lines '
|
|
||||||
'to mark chapters.'))
|
|
||||||
structure('cover', ['--cover'], default=None,
|
|
||||||
help=_('Path to the cover to be used for this book'))
|
|
||||||
structure('prefer_metadata_cover', ['--prefer-metadata-cover'], default=False,
|
|
||||||
action='store_true',
|
|
||||||
help=_('Use the cover detected from the source file in preference '
|
|
||||||
'to the specified cover.'))
|
|
||||||
structure('remove_first_image', ['--remove-first-image'], default=False,
|
|
||||||
help=_('Remove the first image from the input ebook. Useful if '
|
|
||||||
'the first image in the source file is a cover and you '
|
|
||||||
'are specifying an external cover.'))
|
|
||||||
structure('dont_split_on_page_breaks', ['--dont-split-on-page-breaks'], default=False,
|
|
||||||
help=_('Turn off splitting at page breaks. Normally, input files '
|
|
||||||
'are automatically split at every page break into '
|
|
||||||
'two files. This gives an output ebook that can be parsed '
|
|
||||||
'faster and with less resources. However, splitting is '
|
|
||||||
'slow and if your source file contains a very large '
|
|
||||||
'number of page breaks, you should turn off splitting '
|
|
||||||
'on page breaks.'))
|
|
||||||
structure('page', ['--page'], default=None,
|
|
||||||
help=_('XPath expression to detect page boundaries for building '
|
|
||||||
'a custom pagination map, as used by AdobeDE. Default is '
|
|
||||||
'not to build an explicit pagination map.'))
|
|
||||||
structure('page_names', ['--page-names'], default=None,
|
|
||||||
help=_('XPath expression to find the name of each page in the '
|
|
||||||
'pagination map relative to its boundary element. '
|
|
||||||
'Default is to number all pages staring with 1.'))
|
|
||||||
toc = c.add_group('toc',
|
|
||||||
_('''\
|
|
||||||
Control the automatic generation of a Table of Contents. If an OPF file is detected
|
|
||||||
and it specifies a Table of Contents, then that will be used rather than trying
|
|
||||||
to auto-generate a Table of Contents.
|
|
||||||
''').replace('\n', ' '))
|
|
||||||
toc('max_toc_links', ['--max-toc-links'], default=50,
|
|
||||||
help=_('Maximum number of links to insert into the TOC. Set to 0 '
|
|
||||||
'to disable. Default is: %default. Links are only added to the '
|
|
||||||
'TOC if less than the --toc-threshold number of chapters were detected.'))
|
|
||||||
toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
|
|
||||||
help=_("Don't add auto-detected chapters to the Table of Contents."))
|
|
||||||
toc('toc_threshold', ['--toc-threshold'], default=6,
|
|
||||||
help=_('If fewer than this number of chapters is detected, then links '
|
|
||||||
'are added to the Table of Contents. Default: %default'))
|
|
||||||
toc('level1_toc', ['--level1-toc'], default=None,
|
|
||||||
help=_('XPath expression that specifies all tags that should be added '
|
|
||||||
'to the Table of Contents at level one. If this is specified, '
|
|
||||||
'it takes precedence over other forms of auto-detection.'))
|
|
||||||
toc('level2_toc', ['--level2-toc'], default=None,
|
|
||||||
help=_('XPath expression that specifies all tags that should be added '
|
|
||||||
'to the Table of Contents at level two. Each entry is added '
|
|
||||||
'under the previous level one entry.'))
|
|
||||||
toc('level3_toc', ['--level3-toc'], default=None,
|
|
||||||
help=_('XPath expression that specifies all tags that should be added '
|
|
||||||
'to the Table of Contents at level three. Each entry is added '
|
|
||||||
'under the previous level two entry.'))
|
|
||||||
toc('from_ncx', ['--from-ncx'], default=None,
|
|
||||||
help=_('Path to a .ncx file that contains the table of contents to use '
|
|
||||||
'for this ebook. The NCX file should contain links relative to '
|
|
||||||
'the directory it is placed in. See '
|
|
||||||
'http://www.niso.org/workrooms/daisy/Z39-86-2005.html#NCX for '
|
|
||||||
'an overview of the NCX format.'))
|
|
||||||
toc('use_auto_toc', ['--use-auto-toc'], default=False,
|
|
||||||
help=_('Normally, if the source file already has a Table of Contents, '
|
|
||||||
'it is used in preference to the auto-generated one. '
|
|
||||||
'With this option, the auto-generated one is always used.'))
|
|
||||||
|
|
||||||
layout = c.add_group('page layout', _('Control page layout'))
|
|
||||||
layout('margin_top', ['--margin-top'], default=5.0,
|
|
||||||
help=_('Set the top margin in pts. Default is %default'))
|
|
||||||
layout('margin_bottom', ['--margin-bottom'], default=5.0,
|
|
||||||
help=_('Set the bottom margin in pts. Default is %default'))
|
|
||||||
layout('margin_left', ['--margin-left'], default=5.0,
|
|
||||||
help=_('Set the left margin in pts. Default is %default'))
|
|
||||||
layout('margin_right', ['--margin-right'], default=5.0,
|
|
||||||
help=_('Set the right margin in pts. Default is %default'))
|
|
||||||
layout('base_font_size2', ['--base-font-size'], default=12.0,
|
|
||||||
help=_('The base font size in pts. Default is %defaultpt. '
|
|
||||||
'Set to 0 to disable rescaling of fonts.'))
|
|
||||||
layout('remove_paragraph_spacing', ['--remove-paragraph-spacing'], default=False,
|
|
||||||
help=_('Remove spacing between paragraphs. '
|
|
||||||
'Also sets a indent on paragraphs of 1.5em. '
|
|
||||||
'You can override this by adding p {text-indent: 0cm} to '
|
|
||||||
'--override-css. Spacing removal will not work if the source '
|
|
||||||
'file forces inter-paragraph spacing.'))
|
|
||||||
layout('no_justification', ['--no-justification'], default=False,
|
|
||||||
help=_('Do not force text to be justified in output.'))
|
|
||||||
layout('linearize_tables', ['--linearize-tables'], default=False,
|
|
||||||
help=_('Remove table markup, converting it into paragraphs. '
|
|
||||||
'This is useful if your source file uses a table to manage layout.'))
|
|
||||||
layout('preserve_tag_structure', ['--preserve-tag-structure'], default=False,
|
|
||||||
help=_('Preserve the HTML tag structure while splitting large HTML files. '
|
|
||||||
'This is only neccessary if the HTML files contain CSS that '
|
|
||||||
'uses sibling selectors. Enabling this greatly slows down '
|
|
||||||
'processing of large HTML files.'))
|
|
||||||
|
|
||||||
c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
|
|
||||||
help=_('Print generated OPF file to stdout'))
|
|
||||||
c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
|
|
||||||
help=_('Print generated NCX file to stdout'))
|
|
||||||
c.add_opt('keep_intermediate', ['--keep-intermediate-files'], group='debug',
|
|
||||||
default=False,
|
|
||||||
help=_('Keep intermediate files during processing by html2epub'))
|
|
||||||
c.add_opt('extract_to', ['--extract-to'], group='debug', default=None,
|
|
||||||
help=_('Extract the contents of the produced EPUB file to the '
|
|
||||||
'specified directory.'))
|
|
||||||
return c
|
|
||||||
|
@ -1,300 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
'''
|
|
||||||
Font size rationalization. See :function:`relativize`.
|
|
||||||
'''
|
|
||||||
|
|
||||||
import logging, re, operator, functools, collections, unittest, copy, sys
|
|
||||||
from xml.dom import SyntaxErr
|
|
||||||
|
|
||||||
from lxml.cssselect import CSSSelector
|
|
||||||
from lxml import etree
|
|
||||||
from lxml.html import HtmlElement
|
|
||||||
|
|
||||||
from calibre.ebooks.html_old import fromstring
|
|
||||||
from calibre.ebooks.epub import rules
|
|
||||||
from cssutils import CSSParser
|
|
||||||
|
|
||||||
num = r'[-]?\d+|[-]?\d*\.\d+'
|
|
||||||
length = r'(?P<zero>0)|(?P<num>{num})(?P<unit>%|em|ex|px|in|cm|mm|pt|pc)'.replace('{num}', num)
|
|
||||||
absolute_size = r'(?P<abs>(x?x-)?(small|large)|medium)'
|
|
||||||
relative_size = r'(?P<rel>smaller|larger)'
|
|
||||||
|
|
||||||
font_size_pat = re.compile('|'.join((relative_size, absolute_size, length)), re.I)
|
|
||||||
line_height_pat = re.compile(r'({num})(px|in|cm|mm|pt|pc)'.replace('{num}', num))
|
|
||||||
|
|
||||||
PTU = {
|
|
||||||
'in' : 72.,
|
|
||||||
'cm' : 72/2.54,
|
|
||||||
'mm' : 72/25.4,
|
|
||||||
'pt' : 1.0,
|
|
||||||
'pc' : 1/12.,
|
|
||||||
}
|
|
||||||
|
|
||||||
DEFAULT_FONT_SIZE = 12
|
|
||||||
|
|
||||||
class Rationalizer(object):
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def specificity(cls, s):
|
|
||||||
'''Map CSS specificity tuple to a single integer'''
|
|
||||||
return sum([10**(4-i) + x for i,x in enumerate(s)])
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def compute_font_size(cls, elem):
|
|
||||||
'''
|
|
||||||
Calculate the effective font size of an element traversing its ancestors as far as
|
|
||||||
neccessary.
|
|
||||||
'''
|
|
||||||
cfs = elem.computed_font_size
|
|
||||||
if cfs is not None:
|
|
||||||
return
|
|
||||||
sfs = elem.specified_font_size
|
|
||||||
if callable(sfs):
|
|
||||||
parent = elem.getparent()
|
|
||||||
cls.compute_font_size(parent)
|
|
||||||
elem.computed_font_size = sfs(parent.computed_font_size)
|
|
||||||
else:
|
|
||||||
elem.computed_font_size = sfs
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def calculate_font_size(cls, style):
|
|
||||||
'Return font size in pts from style object. For relative units returns a callable'
|
|
||||||
match = font_size_pat.search(style.font)
|
|
||||||
fs = ''
|
|
||||||
if match:
|
|
||||||
fs = match.group()
|
|
||||||
if style.fontSize:
|
|
||||||
fs = style.fontSize
|
|
||||||
|
|
||||||
match = font_size_pat.search(fs)
|
|
||||||
if match is None:
|
|
||||||
return None
|
|
||||||
match = match.groupdict()
|
|
||||||
unit = match.get('unit', '')
|
|
||||||
if unit: unit = unit.lower()
|
|
||||||
if unit in PTU.keys():
|
|
||||||
return PTU[unit] * float(match['num'])
|
|
||||||
if unit in ('em', 'ex'):
|
|
||||||
return functools.partial(operator.mul, float(match['num']))
|
|
||||||
if unit == '%':
|
|
||||||
return functools.partial(operator.mul, float(match['num'])/100.)
|
|
||||||
abs = match.get('abs', '')
|
|
||||||
if abs: abs = abs.lower()
|
|
||||||
if abs:
|
|
||||||
x = (1.2)**(abs.count('x') * (-1 if 'small' in abs else 1))
|
|
||||||
return 12 * x
|
|
||||||
if match.get('zero', False):
|
|
||||||
return 0.
|
|
||||||
return functools.partial(operator.mul, 1.2) if 'larger' in fs.lower() else functools.partial(operator.mul, 0.8)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def resolve_rules(cls, stylesheets):
|
|
||||||
for sheet in stylesheets:
|
|
||||||
if hasattr(sheet, 'fs_rules'):
|
|
||||||
continue
|
|
||||||
sheet.fs_rules = []
|
|
||||||
sheet.lh_rules = []
|
|
||||||
for r in sheet:
|
|
||||||
if r.type == r.STYLE_RULE:
|
|
||||||
font_size = cls.calculate_font_size(r.style)
|
|
||||||
if font_size is not None:
|
|
||||||
for s in r.selectorList:
|
|
||||||
sheet.fs_rules.append([CSSSelector(s.selectorText), font_size])
|
|
||||||
orig = line_height_pat.search(r.style.lineHeight)
|
|
||||||
if orig is not None:
|
|
||||||
for s in r.selectorList:
|
|
||||||
sheet.lh_rules.append([CSSSelector(s.selectorText), float(orig.group(1)) * PTU[orig.group(2).lower()]])
|
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def apply_font_size_rules(cls, stylesheets, root):
|
|
||||||
'Add a ``specified_font_size`` attribute to every element that has a specified font size'
|
|
||||||
cls.resolve_rules(stylesheets)
|
|
||||||
for sheet in stylesheets:
|
|
||||||
for selector, font_size in sheet.fs_rules:
|
|
||||||
elems = selector(root)
|
|
||||||
for elem in elems:
|
|
||||||
elem.specified_font_size = font_size
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def remove_font_size_information(cls, stylesheets):
|
|
||||||
for r in rules(stylesheets):
|
|
||||||
r.style.removeProperty('font-size')
|
|
||||||
try:
|
|
||||||
new = font_size_pat.sub('', r.style.font).strip()
|
|
||||||
if new:
|
|
||||||
r.style.font = new
|
|
||||||
else:
|
|
||||||
r.style.removeProperty('font')
|
|
||||||
except SyntaxErr:
|
|
||||||
r.style.removeProperty('font')
|
|
||||||
if line_height_pat.search(r.style.lineHeight) is not None:
|
|
||||||
r.style.removeProperty('line-height')
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def compute_font_sizes(cls, root, stylesheets, base=12):
|
|
||||||
stylesheets = [s for s in stylesheets if hasattr(s, 'cssText')]
|
|
||||||
cls.apply_font_size_rules(stylesheets, root)
|
|
||||||
|
|
||||||
# Compute the effective font size of all tags
|
|
||||||
root.computed_font_size = DEFAULT_FONT_SIZE
|
|
||||||
for elem in root.iter(etree.Element):
|
|
||||||
cls.compute_font_size(elem)
|
|
||||||
|
|
||||||
extra_css = {}
|
|
||||||
if base > 0:
|
|
||||||
# Calculate the "base" (i.e. most common) font size
|
|
||||||
font_sizes = collections.defaultdict(lambda : 0)
|
|
||||||
body = root.xpath('//body')[0]
|
|
||||||
IGNORE = ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')
|
|
||||||
for elem in body.iter(etree.Element):
|
|
||||||
if elem.tag not in IGNORE:
|
|
||||||
t = getattr(elem, 'text', '')
|
|
||||||
if t: t = t.strip()
|
|
||||||
if t:
|
|
||||||
font_sizes[elem.computed_font_size] += len(t)
|
|
||||||
|
|
||||||
t = getattr(elem, 'tail', '')
|
|
||||||
if t: t = t.strip()
|
|
||||||
if t:
|
|
||||||
parent = elem.getparent()
|
|
||||||
if parent.tag not in IGNORE:
|
|
||||||
font_sizes[parent.computed_font_size] += len(t)
|
|
||||||
|
|
||||||
try:
|
|
||||||
most_common = max(font_sizes.items(), key=operator.itemgetter(1))[0]
|
|
||||||
scale = base/most_common if most_common > 0 else 1.
|
|
||||||
except ValueError:
|
|
||||||
scale = 1.
|
|
||||||
|
|
||||||
# rescale absolute line-heights
|
|
||||||
counter = 0
|
|
||||||
for sheet in stylesheets:
|
|
||||||
for selector, lh in sheet.lh_rules:
|
|
||||||
for elem in selector(root):
|
|
||||||
elem.set('id', elem.get('id', 'cfs_%d'%counter))
|
|
||||||
counter += 1
|
|
||||||
if not extra_css.has_key(elem.get('id')):
|
|
||||||
extra_css[elem.get('id')] = []
|
|
||||||
extra_css[elem.get('id')].append('line-height:%fpt'%(lh*scale))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Rescale all computed font sizes
|
|
||||||
for elem in body.iter(etree.Element):
|
|
||||||
if isinstance(elem, HtmlElement):
|
|
||||||
elem.computed_font_size *= scale
|
|
||||||
|
|
||||||
# Remove all font size specifications from the last stylesheet
|
|
||||||
cls.remove_font_size_information(stylesheets[-1:])
|
|
||||||
|
|
||||||
# Create the CSS to implement the rescaled font sizes
|
|
||||||
for elem in body.iter(etree.Element):
|
|
||||||
cfs, pcfs = map(operator.attrgetter('computed_font_size'), (elem, elem.getparent()))
|
|
||||||
if abs(cfs-pcfs) > 1/12. and abs(pcfs) > 1/12.:
|
|
||||||
elem.set('id', elem.get('id', 'cfs_%d'%counter))
|
|
||||||
counter += 1
|
|
||||||
if not extra_css.has_key(elem.get('id')):
|
|
||||||
extra_css[elem.get('id')] = []
|
|
||||||
extra_css[elem.get('id')].append('font-size: %f%%'%(100*(cfs/pcfs)))
|
|
||||||
|
|
||||||
css = CSSParser(loglevel=logging.ERROR).parseString('')
|
|
||||||
for id, r in extra_css.items():
|
|
||||||
css.add('#%s {%s}'%(id, ';'.join(r)))
|
|
||||||
return css
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def rationalize(cls, stylesheets, root, opts):
|
|
||||||
logger = logging.getLogger('html2epub')
|
|
||||||
logger.info('\t\tRationalizing fonts...')
|
|
||||||
extra_css = None
|
|
||||||
if opts.base_font_size2 > 0:
|
|
||||||
try:
|
|
||||||
extra_css = cls.compute_font_sizes(root, stylesheets, base=opts.base_font_size2)
|
|
||||||
except:
|
|
||||||
logger.warning('Failed to rationalize font sizes.')
|
|
||||||
if opts.verbose > 1:
|
|
||||||
logger.exception('')
|
|
||||||
finally:
|
|
||||||
root.remove_font_size_information()
|
|
||||||
logger.debug('\t\tDone rationalizing')
|
|
||||||
return extra_css
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
############## Testing
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
class FontTest(unittest.TestCase):
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
from calibre.ebooks.epub import config
|
|
||||||
self.opts = config(defaults='').parse()
|
|
||||||
self.html = '''
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>Test document</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<div id="div1">
|
|
||||||
<!-- A comment -->
|
|
||||||
<p id="p1">Some <b>text</b></p>
|
|
||||||
</div>
|
|
||||||
<p id="p2">Some other <span class="it">text</span>.</p>
|
|
||||||
<p id="longest">The longest piece of single font size text in this entire file. Used to test resizing.</p>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
'''
|
|
||||||
self.root = fromstring(self.html)
|
|
||||||
|
|
||||||
def do_test(self, css, base=DEFAULT_FONT_SIZE, scale=1):
|
|
||||||
root1 = copy.deepcopy(self.root)
|
|
||||||
root1.computed_font_size = DEFAULT_FONT_SIZE
|
|
||||||
stylesheet = CSSParser(loglevel=logging.ERROR).parseString(css)
|
|
||||||
stylesheet2 = Rationalizer.compute_font_sizes(root1, [stylesheet], base)
|
|
||||||
root2 = copy.deepcopy(root1)
|
|
||||||
root2.remove_font_size_information()
|
|
||||||
root2.computed_font_size = DEFAULT_FONT_SIZE
|
|
||||||
Rationalizer.apply_font_size_rules([stylesheet2], root2)
|
|
||||||
for elem in root2.iter(etree.Element):
|
|
||||||
Rationalizer.compute_font_size(elem)
|
|
||||||
for e1, e2 in zip(root1.xpath('//body')[0].iter(etree.Element), root2.xpath('//body')[0].iter(etree.Element)):
|
|
||||||
self.assertAlmostEqual(e1.computed_font_size, e2.computed_font_size,
|
|
||||||
msg='Computed font sizes for %s not equal. Original: %f Processed: %f'%\
|
|
||||||
(root1.getroottree().getpath(e1), e1.computed_font_size, e2.computed_font_size))
|
|
||||||
return stylesheet2.cssText
|
|
||||||
|
|
||||||
def testStripping(self):
|
|
||||||
'Test that any original entries are removed from the CSS'
|
|
||||||
css = 'p { font: bold 10px italic smaller; font-size: x-large} \na { font-size: 0 }'
|
|
||||||
css = CSSParser(loglevel=logging.ERROR).parseString(css)
|
|
||||||
Rationalizer.compute_font_sizes(copy.deepcopy(self.root), [css])
|
|
||||||
self.assertEqual(css.cssText.replace(' ', '').replace('\n', ''),
|
|
||||||
'p{font:bolditalic}')
|
|
||||||
|
|
||||||
def testIdentity(self):
|
|
||||||
'Test that no unnecessary font size changes are made'
|
|
||||||
extra_css = self.do_test('div {font-size:12pt} \nspan {font-size:100%}')
|
|
||||||
self.assertEqual(extra_css.strip(), '')
|
|
||||||
|
|
||||||
def testRelativization(self):
|
|
||||||
'Test conversion of absolute to relative sizes'
|
|
||||||
self.do_test('#p1 {font: 24pt} b {font: 12pt} .it {font: 48pt} #p2 {font: 100%}')
|
|
||||||
|
|
||||||
def testResizing(self):
|
|
||||||
'Test resizing of fonts'
|
|
||||||
self.do_test('#longest {font: 24pt} .it {font:20pt; line-height:22pt}')
|
|
||||||
|
|
||||||
|
|
||||||
def suite():
|
|
||||||
return unittest.TestLoader().loadTestsFromTestCase(FontTest)
|
|
||||||
|
|
||||||
def test():
|
|
||||||
unittest.TextTestRunner(verbosity=2).run(suite())
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(test())
|
|
||||||
|
|
@ -1,93 +0,0 @@
|
|||||||
from __future__ import with_statement
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
'''
|
|
||||||
Convert any ebook format to epub.
|
|
||||||
'''
|
|
||||||
|
|
||||||
import sys, os, re
|
|
||||||
from contextlib import nested
|
|
||||||
|
|
||||||
from calibre import extract, walk
|
|
||||||
from calibre.ebooks import DRMError
|
|
||||||
from calibre.ebooks.epub import config as common_config
|
|
||||||
from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index
|
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
|
||||||
from calibre.utils.zipfile import ZipFile
|
|
||||||
from calibre.customize.ui import run_plugins_on_preprocess
|
|
||||||
|
|
||||||
|
|
||||||
SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
|
|
||||||
'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']
|
|
||||||
|
|
||||||
def unarchive(path, tdir):
|
|
||||||
extract(path, tdir)
|
|
||||||
files = list(walk(tdir))
|
|
||||||
|
|
||||||
for ext in ['opf'] + list(MAP.keys()):
|
|
||||||
for f in files:
|
|
||||||
if f.lower().endswith('.'+ext):
|
|
||||||
if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
|
|
||||||
continue
|
|
||||||
return f, ext
|
|
||||||
return find_html_index(files)
|
|
||||||
|
|
||||||
def any2epub(opts, path, notification=None, create_epub=True,
|
|
||||||
oeb_cover=False, extract_to=None):
|
|
||||||
path = run_plugins_on_preprocess(path)
|
|
||||||
ext = os.path.splitext(path)[1]
|
|
||||||
if not ext:
|
|
||||||
raise ValueError('Unknown file type: '+path)
|
|
||||||
ext = ext.lower()[1:]
|
|
||||||
|
|
||||||
if opts.output is None:
|
|
||||||
opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
|
|
||||||
|
|
||||||
with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
|
|
||||||
if ext in ['rar', 'zip', 'oebzip']:
|
|
||||||
path, ext = unarchive(path, tdir1)
|
|
||||||
print 'Found %s file in archive'%(ext.upper())
|
|
||||||
|
|
||||||
if ext in MAP.keys():
|
|
||||||
path = MAP[ext](path, tdir2, opts)
|
|
||||||
ext = 'opf'
|
|
||||||
|
|
||||||
|
|
||||||
if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
|
|
||||||
raise ValueError('Conversion from %s is not supported'%ext.upper())
|
|
||||||
|
|
||||||
print 'Creating EPUB file...'
|
|
||||||
html2epub(path, opts, notification=notification,
|
|
||||||
create_epub=create_epub, oeb_cover=oeb_cover,
|
|
||||||
extract_to=extract_to)
|
|
||||||
|
|
||||||
def config(defaults=None):
|
|
||||||
return common_config(defaults=defaults)
|
|
||||||
|
|
||||||
|
|
||||||
def formats():
|
|
||||||
return ['html', 'rar', 'zip', 'oebzip']+list(MAP.keys())
|
|
||||||
|
|
||||||
USAGE = _('''\
|
|
||||||
%%prog [options] filename
|
|
||||||
|
|
||||||
Convert any of a large number of ebook formats to a %s file. Supported formats are: %s
|
|
||||||
''')
|
|
||||||
|
|
||||||
def option_parser(usage=USAGE):
|
|
||||||
return config().option_parser(usage=usage%('EPUB', formats()))
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
|
||||||
parser = option_parser()
|
|
||||||
opts, args = parser.parse_args(args)
|
|
||||||
if len(args) < 2:
|
|
||||||
parser.print_help()
|
|
||||||
print 'No input file specified.'
|
|
||||||
return 1
|
|
||||||
any2epub(opts, args[1])
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
@ -1,71 +0,0 @@
|
|||||||
from __future__ import with_statement
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
'''
|
|
||||||
Convert periodical content into EPUB ebooks.
|
|
||||||
'''
|
|
||||||
import sys, glob, os
|
|
||||||
from calibre.web.feeds.main import config as feeds2disk_config, USAGE, run_recipe
|
|
||||||
from calibre.ebooks.epub.from_html import config as html2epub_config
|
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
|
||||||
from calibre.ebooks.epub.from_html import convert as html2epub
|
|
||||||
from calibre import strftime, sanitize_file_name
|
|
||||||
|
|
||||||
def config(defaults=None):
|
|
||||||
c = feeds2disk_config(defaults=defaults)
|
|
||||||
c.remove('lrf')
|
|
||||||
c.remove('epub')
|
|
||||||
c.remove('output_dir')
|
|
||||||
c.update(html2epub_config(defaults=defaults))
|
|
||||||
c.remove('chapter_mark')
|
|
||||||
return c
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
c = config()
|
|
||||||
return c.option_parser(usage=USAGE)
|
|
||||||
|
|
||||||
def convert(opts, recipe_arg, notification=None):
|
|
||||||
opts.lrf = False
|
|
||||||
opts.epub = True
|
|
||||||
if opts.debug:
|
|
||||||
opts.verbose = 2
|
|
||||||
parser = option_parser()
|
|
||||||
with TemporaryDirectory('_feeds2epub') as tdir:
|
|
||||||
opts.output_dir = tdir
|
|
||||||
recipe = run_recipe(opts, recipe_arg, parser, notification=notification)
|
|
||||||
c = config()
|
|
||||||
recipe_opts = c.parse_string(recipe.html2epub_options)
|
|
||||||
c.smart_update(recipe_opts, opts)
|
|
||||||
opts = recipe_opts
|
|
||||||
opts.chapter_mark = 'none'
|
|
||||||
opts.dont_split_on_page_breaks = True
|
|
||||||
opf = glob.glob(os.path.join(tdir, '*.opf'))
|
|
||||||
if not opf:
|
|
||||||
raise Exception('Downloading of recipe: %s failed'%recipe_arg)
|
|
||||||
opf = opf[0]
|
|
||||||
|
|
||||||
if opts.output is None:
|
|
||||||
fname = recipe.title + strftime(recipe.timefmt) + '.epub'
|
|
||||||
opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
|
|
||||||
|
|
||||||
print 'Generating epub...'
|
|
||||||
opts.encoding = 'utf-8'
|
|
||||||
opts.remove_paragraph_spacing = True
|
|
||||||
html2epub(opf, opts, notification=notification)
|
|
||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv, notification=None, handler=None):
|
|
||||||
parser = option_parser()
|
|
||||||
opts, args = parser.parse_args(args)
|
|
||||||
if len(args) != 2 and opts.feeds is None:
|
|
||||||
parser.print_help()
|
|
||||||
return 1
|
|
||||||
recipe_arg = args[1] if len(args) > 1 else None
|
|
||||||
convert(opts, recipe_arg, notification=notification)
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
@ -1,547 +0,0 @@
|
|||||||
from __future__ import with_statement
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
'''
|
|
||||||
Conversion of HTML/OPF files follows several stages:
|
|
||||||
|
|
||||||
* All links in the HTML files or in the OPF manifest are
|
|
||||||
followed to build up a list of HTML files to be converted.
|
|
||||||
This stage is implemented by
|
|
||||||
:function:`calibre.ebooks.html.traverse` and
|
|
||||||
:class:`calibre.ebooks.html.HTMLFile`.
|
|
||||||
|
|
||||||
* The HTML is pre-processed to make it more semantic.
|
|
||||||
All links in the HTML files to other resources like images,
|
|
||||||
stylesheets, etc. are relativized. The resources are copied
|
|
||||||
into the `resources` sub directory. This is accomplished by
|
|
||||||
:class:`calibre.ebooks.html.PreProcessor` and
|
|
||||||
:class:`calibre.ebooks.html.Parser`.
|
|
||||||
|
|
||||||
* The HTML is processed. Various operations are performed.
|
|
||||||
All style declarations are extracted and consolidated into
|
|
||||||
a single style sheet. Chapters are auto-detected and marked.
|
|
||||||
Various font related manipulations are performed. See
|
|
||||||
:class:`HTMLProcessor`.
|
|
||||||
|
|
||||||
* The processed HTML is saved and the
|
|
||||||
:module:`calibre.ebooks.epub.split` module is used to split up
|
|
||||||
large HTML files into smaller chunks.
|
|
||||||
|
|
||||||
* The EPUB container is created.
|
|
||||||
'''
|
|
||||||
|
|
||||||
import os, sys, cStringIO, logging, re, functools, shutil
|
|
||||||
|
|
||||||
from lxml.etree import XPath
|
|
||||||
from lxml import html, etree
|
|
||||||
from PyQt4.Qt import QApplication, QPixmap, Qt
|
|
||||||
|
|
||||||
from calibre.ebooks.html_old import Processor, merge_metadata, get_filelist,\
|
|
||||||
opf_traverse, create_metadata, rebase_toc, Link, parser
|
|
||||||
from calibre.ebooks.epub import config as common_config, tostring
|
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
|
||||||
from calibre.ebooks.metadata.opf2 import OPF
|
|
||||||
from calibre.ebooks.epub import initialize_container, PROFILES
|
|
||||||
from calibre.ebooks.epub.split import split
|
|
||||||
from calibre.ebooks.epub.pages import add_page_map
|
|
||||||
from calibre.ebooks.epub.fonts import Rationalizer
|
|
||||||
from calibre.constants import preferred_encoding
|
|
||||||
from calibre.customize.ui import run_plugins_on_postprocess
|
|
||||||
from calibre import walk, CurrentDir, to_unicode, fit_image
|
|
||||||
|
|
||||||
content = functools.partial(os.path.join, u'content')
|
|
||||||
|
|
||||||
def remove_bad_link(element, attribute, link, pos):
|
|
||||||
if attribute is not None:
|
|
||||||
if element.tag in ['link']:
|
|
||||||
element.getparent().remove(element)
|
|
||||||
else:
|
|
||||||
element.set(attribute, '')
|
|
||||||
del element.attrib[attribute]
|
|
||||||
|
|
||||||
def check_links(opf_path, pretty_print):
|
|
||||||
'''
|
|
||||||
Find and remove all invalid links in the HTML files
|
|
||||||
'''
|
|
||||||
logger = logging.getLogger('html2epub')
|
|
||||||
logger.info('\tChecking files for bad links...')
|
|
||||||
pathtoopf = os.path.abspath(opf_path)
|
|
||||||
with CurrentDir(os.path.dirname(pathtoopf)):
|
|
||||||
opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
|
|
||||||
html_files = []
|
|
||||||
for item in opf.itermanifest():
|
|
||||||
if 'html' in item.get('media-type', '').lower():
|
|
||||||
f = item.get('href').split('/')[-1]
|
|
||||||
if isinstance(f, str):
|
|
||||||
f = f.decode('utf-8')
|
|
||||||
html_files.append(os.path.abspath(content(f)))
|
|
||||||
|
|
||||||
for path in html_files:
|
|
||||||
if not os.access(path, os.R_OK):
|
|
||||||
continue
|
|
||||||
base = os.path.dirname(path)
|
|
||||||
root = html.fromstring(open(content(path), 'rb').read(), parser=parser)
|
|
||||||
for element, attribute, link, pos in list(root.iterlinks()):
|
|
||||||
link = to_unicode(link)
|
|
||||||
plink = Link(link, base)
|
|
||||||
bad = False
|
|
||||||
if plink.path is not None and not os.path.exists(plink.path):
|
|
||||||
bad = True
|
|
||||||
if bad:
|
|
||||||
remove_bad_link(element, attribute, link, pos)
|
|
||||||
open(content(path), 'wb').write(tostring(root, pretty_print))
|
|
||||||
|
|
||||||
def find_html_index(files):
|
|
||||||
'''
|
|
||||||
Given a list of files, find the most likely root HTML file in the
|
|
||||||
list.
|
|
||||||
'''
|
|
||||||
html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
|
|
||||||
html_files = [f for f in files if html_pat.search(f) is not None]
|
|
||||||
if not html_files:
|
|
||||||
raise ValueError(_('Could not find an ebook inside the archive'))
|
|
||||||
html_files = [(f, os.stat(f).st_size) for f in html_files]
|
|
||||||
html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
|
|
||||||
html_files = [f[0] for f in html_files]
|
|
||||||
for q in ('toc', 'index'):
|
|
||||||
for f in html_files:
|
|
||||||
if os.path.splitext(os.path.basename(f))[0].lower() == q:
|
|
||||||
return f, os.path.splitext(f)[1].lower()[1:]
|
|
||||||
return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
|
|
||||||
|
|
||||||
def rescale_images(imgdir, screen_size, log):
|
|
||||||
pwidth, pheight = screen_size
|
|
||||||
if QApplication.instance() is None:
|
|
||||||
QApplication([])
|
|
||||||
for f in os.listdir(imgdir):
|
|
||||||
path = os.path.join(imgdir, f)
|
|
||||||
if os.path.splitext(f)[1] in ('.css', '.js'):
|
|
||||||
continue
|
|
||||||
|
|
||||||
p = QPixmap()
|
|
||||||
p.load(path)
|
|
||||||
if p.isNull():
|
|
||||||
continue
|
|
||||||
width, height = p.width(), p.height()
|
|
||||||
scaled, new_width, new_height = fit_image(width, height, pwidth,
|
|
||||||
pheight)
|
|
||||||
if scaled:
|
|
||||||
log.info('Rescaling image: '+f)
|
|
||||||
p.scaled(new_width, new_height, Qt.IgnoreAspectRatio,
|
|
||||||
Qt.SmoothTransformation).save(path, 'JPEG')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class HTMLProcessor(Processor, Rationalizer):
|
|
||||||
|
|
||||||
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets):
|
|
||||||
Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
|
|
||||||
name='html2epub')
|
|
||||||
if opts.verbose > 2:
|
|
||||||
self.debug_tree('parsed')
|
|
||||||
self.detect_chapters()
|
|
||||||
|
|
||||||
self.extract_css(stylesheets)
|
|
||||||
if self.opts.base_font_size2 > 0:
|
|
||||||
self.font_css = self.rationalize(self.external_stylesheets+[self.stylesheet],
|
|
||||||
self.root, self.opts)
|
|
||||||
if opts.verbose > 2:
|
|
||||||
self.debug_tree('nocss')
|
|
||||||
|
|
||||||
if hasattr(self.body, 'xpath'):
|
|
||||||
for script in list(self.body.xpath('descendant::script')):
|
|
||||||
script.getparent().remove(script)
|
|
||||||
|
|
||||||
self.fix_markup()
|
|
||||||
|
|
||||||
def convert_image(self, img):
|
|
||||||
rpath = img.get('src', '')
|
|
||||||
path = os.path.join(os.path.dirname(self.save_path()), *rpath.split('/'))
|
|
||||||
if os.path.exists(path) and os.path.isfile(path):
|
|
||||||
if QApplication.instance() is None:
|
|
||||||
app = QApplication([])
|
|
||||||
app
|
|
||||||
p = QPixmap()
|
|
||||||
p.load(path)
|
|
||||||
if not p.isNull():
|
|
||||||
p.save(path + '_calibre_converted.jpg')
|
|
||||||
os.remove(path)
|
|
||||||
for key, val in self.resource_map.items():
|
|
||||||
if val == rpath:
|
|
||||||
self.resource_map[key] = rpath+'_calibre_converted.jpg'
|
|
||||||
img.set('src', rpath+'_calibre_converted.jpg')
|
|
||||||
|
|
||||||
def fix_markup(self):
|
|
||||||
'''
|
|
||||||
Perform various markup transforms to get the output to render correctly
|
|
||||||
in the quirky ADE.
|
|
||||||
'''
|
|
||||||
# Replace <br> that are children of <body> as ADE doesn't handle them
|
|
||||||
if hasattr(self.body, 'xpath'):
|
|
||||||
for br in self.body.xpath('./br'):
|
|
||||||
if br.getparent() is None:
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
sibling = br.itersiblings().next()
|
|
||||||
except:
|
|
||||||
sibling = None
|
|
||||||
br.tag = 'p'
|
|
||||||
br.text = u'\u00a0'
|
|
||||||
if (br.tail and br.tail.strip()) or sibling is None or \
|
|
||||||
getattr(sibling, 'tag', '') != 'br':
|
|
||||||
style = br.get('style', '').split(';')
|
|
||||||
style = filter(None, map(lambda x: x.strip(), style))
|
|
||||||
style.append('margin: 0pt; border:0pt; height:0pt')
|
|
||||||
br.set('style', '; '.join(style))
|
|
||||||
else:
|
|
||||||
sibling.getparent().remove(sibling)
|
|
||||||
if sibling.tail:
|
|
||||||
if not br.tail:
|
|
||||||
br.tail = ''
|
|
||||||
br.tail += sibling.tail
|
|
||||||
|
|
||||||
|
|
||||||
if self.opts.profile.remove_object_tags:
|
|
||||||
for tag in self.root.xpath('//embed'):
|
|
||||||
tag.getparent().remove(tag)
|
|
||||||
for tag in self.root.xpath('//object'):
|
|
||||||
if tag.get('type', '').lower().strip() in ('image/svg+xml',):
|
|
||||||
continue
|
|
||||||
tag.getparent().remove(tag)
|
|
||||||
|
|
||||||
|
|
||||||
for tag in self.root.xpath('//title|//style'):
|
|
||||||
if not tag.text:
|
|
||||||
tag.getparent().remove(tag)
|
|
||||||
for tag in self.root.xpath('//script'):
|
|
||||||
if not tag.text and not tag.get('src', False):
|
|
||||||
tag.getparent().remove(tag)
|
|
||||||
|
|
||||||
for tag in self.root.xpath('//form'):
|
|
||||||
tag.getparent().remove(tag)
|
|
||||||
|
|
||||||
for tag in self.root.xpath('//center'):
|
|
||||||
tag.tag = 'div'
|
|
||||||
tag.set('style', 'text-align:center')
|
|
||||||
|
|
||||||
if self.opts.linearize_tables:
|
|
||||||
for tag in self.root.xpath('//table | //tr | //th | //td'):
|
|
||||||
tag.tag = 'div'
|
|
||||||
|
|
||||||
# ADE can't handle & in an img url
|
|
||||||
for tag in self.root.xpath('//img[@src]'):
|
|
||||||
tag.set('src', tag.get('src', '').replace('&', ''))
|
|
||||||
|
|
||||||
|
|
||||||
def save(self):
|
|
||||||
for meta in list(self.root.xpath('//meta')):
|
|
||||||
meta.getparent().remove(meta)
|
|
||||||
# Strip all comments since Adobe DE is petrified of them
|
|
||||||
Processor.save(self, strip_comments=True)
|
|
||||||
|
|
||||||
def remove_first_image(self):
|
|
||||||
images = self.root.xpath('//img')
|
|
||||||
if images:
|
|
||||||
images[0].getparent().remove(images[0])
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def config(defaults=None):
|
|
||||||
return common_config(defaults=defaults)
|
|
||||||
|
|
||||||
def option_parser():
|
|
||||||
c = config()
|
|
||||||
return c.option_parser(usage=_('''\
|
|
||||||
%prog [options] file.html|opf
|
|
||||||
|
|
||||||
Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file.
|
|
||||||
If you specify an OPF file instead of an HTML file, the list of links is takes from
|
|
||||||
the <spine> element of the OPF file.
|
|
||||||
'''))
|
|
||||||
|
|
||||||
def parse_content(filelist, opts, tdir):
|
|
||||||
os.makedirs(os.path.join(tdir, 'content', 'resources'))
|
|
||||||
resource_map, stylesheets = {}, {}
|
|
||||||
toc = TOC(base_path=tdir, type='root')
|
|
||||||
stylesheet_map = {}
|
|
||||||
first_image_removed = False
|
|
||||||
for htmlfile in filelist:
|
|
||||||
logging.getLogger('html2epub').debug('Processing %s...'%htmlfile)
|
|
||||||
hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
|
|
||||||
resource_map, filelist, stylesheets)
|
|
||||||
if not first_image_removed and opts.remove_first_image:
|
|
||||||
first_image_removed = hp.remove_first_image()
|
|
||||||
hp.populate_toc(toc)
|
|
||||||
hp.save()
|
|
||||||
stylesheet_map[os.path.basename(hp.save_path())] = \
|
|
||||||
[s for s in hp.external_stylesheets + [hp.stylesheet, hp.font_css, hp.override_css] if s is not None]
|
|
||||||
|
|
||||||
logging.getLogger('html2epub').debug('Saving stylesheets...')
|
|
||||||
if opts.base_font_size2 > 0:
|
|
||||||
Rationalizer.remove_font_size_information(stylesheets.values())
|
|
||||||
for path, css in stylesheets.items():
|
|
||||||
raw = getattr(css, 'cssText', css)
|
|
||||||
if isinstance(raw, unicode):
|
|
||||||
raw = raw.encode('utf-8')
|
|
||||||
open(path, 'wb').write(raw)
|
|
||||||
if toc.count('chapter') > opts.toc_threshold:
|
|
||||||
toc.purge(['file', 'link', 'unknown'])
|
|
||||||
if toc.count('chapter') + toc.count('file') > opts.toc_threshold:
|
|
||||||
toc.purge(['link', 'unknown'])
|
|
||||||
toc.purge(['link'], max=opts.max_toc_links)
|
|
||||||
|
|
||||||
return resource_map, hp.htmlfile_map, toc, stylesheet_map
|
|
||||||
|
|
||||||
TITLEPAGE = '''\
|
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
|
||||||
<head>
|
|
||||||
<title>Cover</title>
|
|
||||||
<style type="text/css" title="override_css">
|
|
||||||
@page {padding: 0pt; margin:0pt}
|
|
||||||
body { text-align: center; padding:0pt; margin: 0pt; }
|
|
||||||
div { margin: 0pt; padding: 0pt; }
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<div>
|
|
||||||
<img src="%s" alt="cover" style="height: 100%%" />
|
|
||||||
</div>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
'''
|
|
||||||
|
|
||||||
def create_cover_image(src, dest, screen_size, rescale_cover=True):
|
|
||||||
try:
|
|
||||||
from PyQt4.Qt import QImage, Qt
|
|
||||||
if QApplication.instance() is None:
|
|
||||||
QApplication([])
|
|
||||||
im = QImage()
|
|
||||||
im.load(src)
|
|
||||||
if im.isNull():
|
|
||||||
raise ValueError('Invalid cover image')
|
|
||||||
if rescale_cover and screen_size is not None:
|
|
||||||
width, height = im.width(), im.height()
|
|
||||||
dw, dh = (screen_size[0]-width)/float(width), (screen_size[1]-height)/float(height)
|
|
||||||
delta = min(dw, dh)
|
|
||||||
if delta > 0:
|
|
||||||
nwidth = int(width + delta*(width))
|
|
||||||
nheight = int(height + delta*(height))
|
|
||||||
im = im.scaled(int(nwidth), int(nheight), Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
|
|
||||||
im.save(dest)
|
|
||||||
except:
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def process_title_page(mi, filelist, htmlfilemap, opts, tdir):
|
|
||||||
old_title_page = None
|
|
||||||
f = lambda x : os.path.normcase(os.path.normpath(x))
|
|
||||||
if not isinstance(mi.cover, basestring):
|
|
||||||
mi.cover = None
|
|
||||||
if mi.cover:
|
|
||||||
if f(filelist[0].path) == f(mi.cover):
|
|
||||||
old_title_page = htmlfilemap[filelist[0].path]
|
|
||||||
#logger = logging.getLogger('html2epub')
|
|
||||||
metadata_cover = mi.cover
|
|
||||||
if metadata_cover and not os.path.exists(metadata_cover):
|
|
||||||
metadata_cover = None
|
|
||||||
|
|
||||||
cpath = '/'.join(('resources', '_cover_.jpg'))
|
|
||||||
cover_dest = os.path.join(tdir, 'content', *cpath.split('/'))
|
|
||||||
if metadata_cover is not None:
|
|
||||||
if not create_cover_image(metadata_cover, cover_dest,
|
|
||||||
opts.profile.screen_size):
|
|
||||||
metadata_cover = None
|
|
||||||
specified_cover = opts.cover
|
|
||||||
if specified_cover and not os.path.exists(specified_cover):
|
|
||||||
specified_cover = None
|
|
||||||
if specified_cover is not None:
|
|
||||||
if not create_cover_image(specified_cover, cover_dest,
|
|
||||||
opts.profile.screen_size):
|
|
||||||
specified_cover = None
|
|
||||||
|
|
||||||
cover = metadata_cover if specified_cover is None or (opts.prefer_metadata_cover and metadata_cover is not None) else specified_cover
|
|
||||||
|
|
||||||
if cover is not None:
|
|
||||||
titlepage = TITLEPAGE%cpath
|
|
||||||
tp = 'calibre_title_page.html' if old_title_page is None else old_title_page
|
|
||||||
tppath = os.path.join(tdir, 'content', tp)
|
|
||||||
with open(tppath, 'wb') as f:
|
|
||||||
f.write(titlepage)
|
|
||||||
return tp if old_title_page is None else None, True
|
|
||||||
elif os.path.exists(cover_dest):
|
|
||||||
os.remove(cover_dest)
|
|
||||||
return None, old_title_page is not None
|
|
||||||
|
|
||||||
def find_oeb_cover(htmlfile):
|
|
||||||
if os.stat(htmlfile).st_size > 2048:
|
|
||||||
return None
|
|
||||||
match = re.search(r'(?i)<img[^<>]+src\s*=\s*[\'"](.+?)[\'"]', open(htmlfile, 'rb').read())
|
|
||||||
if match:
|
|
||||||
return match.group(1)
|
|
||||||
|
|
||||||
def condense_ncx(ncx_path):
|
|
||||||
tree = etree.parse(ncx_path)
|
|
||||||
for tag in tree.getroot().iter(tag=etree.Element):
|
|
||||||
if tag.text:
|
|
||||||
tag.text = tag.text.strip()
|
|
||||||
if tag.tail:
|
|
||||||
tag.tail = tag.tail.strip()
|
|
||||||
compressed = etree.tostring(tree.getroot(), encoding='utf-8')
|
|
||||||
open(ncx_path, 'wb').write(compressed)
|
|
||||||
|
|
||||||
def convert(htmlfile, opts, notification=None, create_epub=True,
|
|
||||||
oeb_cover=False, extract_to=None):
|
|
||||||
htmlfile = os.path.abspath(htmlfile)
|
|
||||||
if opts.output is None:
|
|
||||||
opts.output = os.path.splitext(os.path.basename(htmlfile))[0] + '.epub'
|
|
||||||
opts.profile = PROFILES[opts.profile]
|
|
||||||
opts.output = os.path.abspath(opts.output)
|
|
||||||
if opts.override_css is not None:
|
|
||||||
try:
|
|
||||||
opts.override_css = open(opts.override_css, 'rb').read().decode(preferred_encoding, 'replace')
|
|
||||||
except:
|
|
||||||
opts.override_css = opts.override_css.decode(preferred_encoding, 'replace')
|
|
||||||
if opts.from_opf:
|
|
||||||
opts.from_opf = os.path.abspath(opts.from_opf)
|
|
||||||
if opts.from_ncx:
|
|
||||||
opts.from_ncx = os.path.abspath(opts.from_ncx)
|
|
||||||
if htmlfile.lower().endswith('.opf'):
|
|
||||||
opf = OPF(htmlfile, os.path.dirname(os.path.abspath(htmlfile)))
|
|
||||||
filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
|
|
||||||
if not filelist:
|
|
||||||
# Bad OPF look for a HTML file instead
|
|
||||||
htmlfile = find_html_index(walk(os.path.dirname(htmlfile)))[0]
|
|
||||||
if htmlfile is None:
|
|
||||||
raise ValueError('Could not find suitable file to convert.')
|
|
||||||
filelist = get_filelist(htmlfile, opts)[1]
|
|
||||||
mi = merge_metadata(None, opf, opts)
|
|
||||||
else:
|
|
||||||
opf, filelist = get_filelist(htmlfile, opts)
|
|
||||||
mi = merge_metadata(htmlfile, opf, opts)
|
|
||||||
opts.chapter = XPath(opts.chapter,
|
|
||||||
namespaces={'re':'http://exslt.org/regular-expressions'})
|
|
||||||
for x in (1, 2, 3):
|
|
||||||
attr = 'level%d_toc'%x
|
|
||||||
if getattr(opts, attr):
|
|
||||||
setattr(opts, attr, XPath(getattr(opts, attr),
|
|
||||||
namespaces={'re':'http://exslt.org/regular-expressions'}))
|
|
||||||
else:
|
|
||||||
setattr(opts, attr, None)
|
|
||||||
|
|
||||||
with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir:
|
|
||||||
if opts.keep_intermediate:
|
|
||||||
print 'Intermediate files in', tdir
|
|
||||||
resource_map, htmlfile_map, generated_toc, stylesheet_map = \
|
|
||||||
parse_content(filelist, opts, tdir)
|
|
||||||
logger = logging.getLogger('html2epub')
|
|
||||||
resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
|
|
||||||
|
|
||||||
|
|
||||||
title_page, has_title_page = process_title_page(mi, filelist, htmlfile_map, opts, tdir)
|
|
||||||
spine = [htmlfile_map[f.path] for f in filelist]
|
|
||||||
if not oeb_cover and title_page is not None:
|
|
||||||
spine = [title_page] + spine
|
|
||||||
mi.cover = None
|
|
||||||
mi.cover_data = (None, None)
|
|
||||||
|
|
||||||
|
|
||||||
mi = create_metadata(tdir, mi, spine, resources)
|
|
||||||
buf = cStringIO.StringIO()
|
|
||||||
if mi.toc:
|
|
||||||
rebase_toc(mi.toc, htmlfile_map, tdir)
|
|
||||||
if opts.use_auto_toc or mi.toc is None or len(list(mi.toc.flat())) < 2:
|
|
||||||
mi.toc = generated_toc
|
|
||||||
if opts.from_ncx:
|
|
||||||
toc = TOC()
|
|
||||||
toc.read_ncx_toc(opts.from_ncx)
|
|
||||||
mi.toc = toc
|
|
||||||
for item in mi.manifest:
|
|
||||||
if getattr(item, 'mime_type', None) == 'text/html':
|
|
||||||
item.mime_type = 'application/xhtml+xml'
|
|
||||||
opf_path = os.path.join(tdir, 'metadata.opf')
|
|
||||||
with open(opf_path, 'wb') as f:
|
|
||||||
mi.render(f, buf, 'toc.ncx')
|
|
||||||
toc = buf.getvalue()
|
|
||||||
if toc:
|
|
||||||
with open(os.path.join(tdir, 'toc.ncx'), 'wb') as f:
|
|
||||||
f.write(toc)
|
|
||||||
if opts.show_ncx:
|
|
||||||
print toc
|
|
||||||
split(opf_path, opts, stylesheet_map)
|
|
||||||
if opts.page:
|
|
||||||
logger.info('\tBuilding page map...')
|
|
||||||
add_page_map(opf_path, opts)
|
|
||||||
check_links(opf_path, opts.pretty_print)
|
|
||||||
|
|
||||||
opf = OPF(opf_path, tdir)
|
|
||||||
opf.remove_guide()
|
|
||||||
oeb_cover_file = None
|
|
||||||
if oeb_cover and title_page is not None:
|
|
||||||
oeb_cover_file = find_oeb_cover(os.path.join(tdir, 'content', title_page))
|
|
||||||
if has_title_page or (oeb_cover and oeb_cover_file):
|
|
||||||
opf.create_guide_element()
|
|
||||||
if has_title_page and not oeb_cover:
|
|
||||||
opf.add_guide_item('cover', 'Cover', 'content/'+spine[0])
|
|
||||||
if oeb_cover and oeb_cover_file:
|
|
||||||
opf.add_guide_item('cover', 'Cover', 'content/'+oeb_cover_file)
|
|
||||||
|
|
||||||
cpath = os.path.join(tdir, 'content', 'resources', '_cover_.jpg')
|
|
||||||
if os.path.exists(cpath):
|
|
||||||
opf.add_path_to_manifest(cpath, 'image/jpeg')
|
|
||||||
with open(opf_path, 'wb') as f:
|
|
||||||
f.write(opf.render())
|
|
||||||
ncx_path = os.path.join(os.path.dirname(opf_path), 'toc.ncx')
|
|
||||||
if os.path.exists(ncx_path) and os.stat(ncx_path).st_size > opts.profile.flow_size:
|
|
||||||
logger.info('Condensing NCX from %d bytes...'%os.stat(ncx_path).st_size)
|
|
||||||
condense_ncx(ncx_path)
|
|
||||||
if os.stat(ncx_path).st_size > opts.profile.flow_size:
|
|
||||||
logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size)
|
|
||||||
|
|
||||||
if opts.profile.screen_size is not None:
|
|
||||||
rescale_images(os.path.join(tdir, 'content', 'resources'),
|
|
||||||
opts.profile.screen_size, logger)
|
|
||||||
|
|
||||||
if create_epub:
|
|
||||||
epub = initialize_container(opts.output)
|
|
||||||
epub.add_dir(tdir)
|
|
||||||
epub.close()
|
|
||||||
run_plugins_on_postprocess(opts.output, 'epub')
|
|
||||||
logger.info(_('Output written to ')+opts.output)
|
|
||||||
|
|
||||||
if opts.show_opf:
|
|
||||||
print open(opf_path, 'rb').read()
|
|
||||||
|
|
||||||
if opts.extract_to is not None:
|
|
||||||
if os.path.exists(opts.extract_to):
|
|
||||||
shutil.rmtree(opts.extract_to)
|
|
||||||
shutil.copytree(tdir, opts.extract_to)
|
|
||||||
|
|
||||||
if extract_to is not None:
|
|
||||||
if os.path.exists(extract_to):
|
|
||||||
shutil.rmtree(extract_to)
|
|
||||||
shutil.copytree(tdir, extract_to)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
|
||||||
parser = option_parser()
|
|
||||||
opts, args = parser.parse_args(args)
|
|
||||||
if len(args) < 2:
|
|
||||||
parser.print_help()
|
|
||||||
print _('You must specify an input HTML file')
|
|
||||||
return 1
|
|
||||||
convert(args[1], opts)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
@ -6,9 +6,15 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os
|
||||||
|
from urllib import unquote
|
||||||
|
|
||||||
from calibre.customize.conversion import OutputFormatPlugin
|
from calibre.customize.conversion import OutputFormatPlugin
|
||||||
from calibre import CurrentDir
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre.constants import __appname__, __version__
|
||||||
|
from calibre import strftime, guess_type
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
|
||||||
class EPUBOutput(OutputFormatPlugin):
|
class EPUBOutput(OutputFormatPlugin):
|
||||||
|
|
||||||
@ -16,7 +22,218 @@ class EPUBOutput(OutputFormatPlugin):
|
|||||||
author = 'Kovid Goyal'
|
author = 'Kovid Goyal'
|
||||||
file_type = 'epub'
|
file_type = 'epub'
|
||||||
|
|
||||||
|
TITLEPAGE_COVER = '''\
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||||
|
<head>
|
||||||
|
<title>Cover</title>
|
||||||
|
<style type="text/css" title="override_css">
|
||||||
|
@page {padding: 0pt; margin:0pt}
|
||||||
|
body { text-align: center; padding:0pt; margin: 0pt; }
|
||||||
|
div { margin: 0pt; padding: 0pt; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div>
|
||||||
|
<img src="%s" alt="cover" style="height: 100%%" />
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
'''
|
||||||
|
|
||||||
|
TITLEPAGE = '''\
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||||
|
<head>
|
||||||
|
<style type="text/css">
|
||||||
|
body {
|
||||||
|
background: white no-repeat fixed center center;
|
||||||
|
text-align: center;
|
||||||
|
vertical-align: center;
|
||||||
|
overflow: hidden;
|
||||||
|
font-size: 18px;
|
||||||
|
}
|
||||||
|
h1 { font-family: serif; }
|
||||||
|
h2, h4 { font-family: monospace; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>%(title)s</h1>
|
||||||
|
<br/><br/>
|
||||||
|
<div style="position:relative">
|
||||||
|
<div style="position: absolute; left: 0; top: 0; width:100%%; height:100%%; vertical-align:center">
|
||||||
|
<img src="%(img)s" alt="calibre" style="opacity:0.3"/>
|
||||||
|
</div>
|
||||||
|
<div style="position: absolute; left: 0; top: 0; width:100%%; height:100%%; vertical-align:center">
|
||||||
|
<h2>%(date)s</h2>
|
||||||
|
<br/><br/><br/><br/><br/>
|
||||||
|
<h3>%(author)s</h3>
|
||||||
|
<br/><br/></br/><br/><br/><br/><br/><br/><br/>
|
||||||
|
<h4>Produced by %(app)s</h4>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
'''
|
||||||
|
|
||||||
def convert(self, oeb, output_path, input_plugin, opts, log):
|
def convert(self, oeb, output_path, input_plugin, opts, log):
|
||||||
self.log, self.opts = log, opts
|
self.log, self.opts, self.oeb = log, opts, oeb
|
||||||
|
|
||||||
|
self.workaround_ade_quirks()
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.transforms.rescale import RescaleImages
|
||||||
|
RescaleImages()(oeb, opts)
|
||||||
|
self.insert_cover()
|
||||||
|
|
||||||
|
with TemporaryDirectory('_epub_output') as tdir:
|
||||||
|
from calibre.customize.ui import plugin_for_output_format
|
||||||
|
oeb_output = plugin_for_output_format('oeb')
|
||||||
|
oeb_output.convert(oeb, tdir, input_plugin, opts, log)
|
||||||
|
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
|
||||||
|
self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
|
||||||
|
if x.endswith('.ncx')][0])
|
||||||
|
|
||||||
|
from calibre.epub import initialize_container
|
||||||
|
epub = initialize_container(output_path, os.path.basename(opf))
|
||||||
|
epub.add_dir(tdir)
|
||||||
|
epub.close()
|
||||||
|
|
||||||
|
def default_cover(self):
|
||||||
|
'''
|
||||||
|
Create a generic cover for books that dont have a cover
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
from calibre.gui2 import images_rc # Needed for access to logo
|
||||||
|
from PyQt4.Qt import QApplication, QFile, QIODevice
|
||||||
|
except:
|
||||||
|
return None
|
||||||
|
from calibre.ebooks.metadata import authors_to_string
|
||||||
|
images_rc
|
||||||
|
m = self.oeb.metadata
|
||||||
|
title = unicode(m.title[0])
|
||||||
|
a = [unicode(x) for x in m.creators if m.role == 'aut']
|
||||||
|
author = authors_to_string(a)
|
||||||
|
if QApplication.instance() is None: QApplication([])
|
||||||
|
f = QFile(':/library')
|
||||||
|
f.open(QIODevice.ReadOnly)
|
||||||
|
img_data = str(f.readAll())
|
||||||
|
id, href = self.oeb.manifest.generate('calibre-logo',
|
||||||
|
'calibre-logo.png')
|
||||||
|
self.oeb.manifest.add(id, href, 'image/png', data=img_data)
|
||||||
|
html = self.TITLEPAGE%dict(title=title, author=author,
|
||||||
|
date=strftime('%d %b, %Y'),
|
||||||
|
app=__appname__ +' '+__version__,
|
||||||
|
img=href)
|
||||||
|
id, href = self.oeb.manifest.generate('calibre-titlepage',
|
||||||
|
'calibre-titlepage.xhtml')
|
||||||
|
return self.oeb.manifest.add(id, href, guess_type('t.xhtml')[0],
|
||||||
|
data=etree.fromstring(html))
|
||||||
|
|
||||||
|
|
||||||
|
def insert_cover(self):
|
||||||
|
from calibre.ebooks.oeb.base import urldefrag
|
||||||
|
from calibre import guess_type
|
||||||
|
g, m = self.oeb.guide, self.oeb.manifest
|
||||||
|
if 'titlepage' not in g:
|
||||||
|
if 'cover' in g:
|
||||||
|
tp = self.TITLEPAGE_COVER%unquote(g['cover'].href)
|
||||||
|
id, href = m.generate('titlepage', 'titlepage.xhtml')
|
||||||
|
item = m.add(id, href, guess_type('t.xhtml'),
|
||||||
|
data=etree.fromstring(tp))
|
||||||
|
else:
|
||||||
|
item = self.default_cover()
|
||||||
|
else:
|
||||||
|
item = self.oeb.manifest.hrefs[
|
||||||
|
urldefrag(self.oeb.guide['titlepage'].href)[0]]
|
||||||
|
if item is not None:
|
||||||
|
self.oeb.spine.insert(0, item, True)
|
||||||
|
self.oeb.guide.refs['cover'].href = item.href
|
||||||
|
self.oeb.guide.refs['titlepage'].href = item.href
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def condense_ncx(self, ncx_path):
|
||||||
|
if not self.opts.pretty_print:
|
||||||
|
tree = etree.parse(ncx_path)
|
||||||
|
for tag in tree.getroot().iter(tag=etree.Element):
|
||||||
|
if tag.text:
|
||||||
|
tag.text = tag.text.strip()
|
||||||
|
if tag.tail:
|
||||||
|
tag.tail = tag.tail.strip()
|
||||||
|
compressed = etree.tostring(tree.getroot(), encoding='utf-8')
|
||||||
|
open(ncx_path, 'wb').write(compressed)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def workaround_ade_quirks(self):
|
||||||
|
'''
|
||||||
|
Perform various markup transforms to get the output to render correctly
|
||||||
|
in the quirky ADE.
|
||||||
|
'''
|
||||||
|
from calibre.ebooks.oeb.base import XPNSMAP, XHTML
|
||||||
|
from lxml.etree import XPath as _XPath
|
||||||
|
from functools import partial
|
||||||
|
XPath = partial(_XPath, namespaces=XPNSMAP)
|
||||||
|
|
||||||
|
for x in self.oeb.spine:
|
||||||
|
root = x.data
|
||||||
|
body = XPath('//h:body')(root)
|
||||||
|
if body:
|
||||||
|
body = body[0]
|
||||||
|
# Replace <br> that are children of <body> as ADE doesn't handle them
|
||||||
|
if hasattr(body, 'xpath'):
|
||||||
|
for br in body.xpath('./h:br'):
|
||||||
|
if br.getparent() is None:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
sibling = br.itersiblings().next()
|
||||||
|
except:
|
||||||
|
sibling = None
|
||||||
|
br.tag = XHTML('p')
|
||||||
|
br.text = u'\u00a0'
|
||||||
|
if (br.tail and br.tail.strip()) or sibling is None or \
|
||||||
|
getattr(sibling, 'tag', '') != XHTML('br'):
|
||||||
|
style = br.get('style', '').split(';')
|
||||||
|
style = filter(None, map(lambda x: x.strip(), style))
|
||||||
|
style.append('margin: 0pt; border:0pt; height:0pt')
|
||||||
|
br.set('style', '; '.join(style))
|
||||||
|
else:
|
||||||
|
sibling.getparent().remove(sibling)
|
||||||
|
if sibling.tail:
|
||||||
|
if not br.tail:
|
||||||
|
br.tail = ''
|
||||||
|
br.tail += sibling.tail
|
||||||
|
|
||||||
|
|
||||||
|
if self.opts.output_profile.remove_object_tags:
|
||||||
|
for tag in root.xpath('//h:embed'):
|
||||||
|
tag.getparent().remove(tag)
|
||||||
|
for tag in root.xpath('//h:object'):
|
||||||
|
if tag.get('type', '').lower().strip() in ('image/svg+xml',):
|
||||||
|
continue
|
||||||
|
tag.getparent().remove(tag)
|
||||||
|
|
||||||
|
for tag in root.xpath('//h:title|//h:style'):
|
||||||
|
if not tag.text:
|
||||||
|
tag.getparent().remove(tag)
|
||||||
|
for tag in root.xpath('//h:script'):
|
||||||
|
if not tag.text and not tag.get('src', False):
|
||||||
|
tag.getparent().remove(tag)
|
||||||
|
|
||||||
|
for tag in root.xpath('//h:form'):
|
||||||
|
tag.getparent().remove(tag)
|
||||||
|
|
||||||
|
for tag in root.xpath('//h:center'):
|
||||||
|
tag.tag = XHTML('div')
|
||||||
|
tag.set('style', 'text-align:center')
|
||||||
|
|
||||||
|
# ADE can't handle & in an img url
|
||||||
|
for tag in self.root.xpath('//h:img[@src]'):
|
||||||
|
tag.set('src', tag.get('src', '').replace('&', ''))
|
||||||
|
|
||||||
|
stylesheet = self.oeb.manifest.hrefs['stylesheet.css']
|
||||||
|
stylesheet.data.add('a { color: inherit; text-decoration: inherit; '
|
||||||
|
'cursor: default; }')
|
||||||
|
stylesheet.data.add('a[href] { color: blue; '
|
||||||
|
'text-decoration: underline; cursor:pointer; }')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -12,13 +12,15 @@ from cStringIO import StringIO
|
|||||||
from PyQt4.Qt import QFontDatabase
|
from PyQt4.Qt import QFontDatabase
|
||||||
|
|
||||||
from calibre.customize.ui import available_input_formats
|
from calibre.customize.ui import available_input_formats
|
||||||
from calibre.ebooks.epub.from_html import TITLEPAGE
|
|
||||||
from calibre.ebooks.metadata.opf2 import OPF
|
from calibre.ebooks.metadata.opf2 import OPF
|
||||||
from calibre.ptempfile import TemporaryDirectory
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
from calibre.ebooks.chardet import xml_to_unicode
|
from calibre.ebooks.chardet import xml_to_unicode
|
||||||
from calibre.utils.zipfile import safe_replace, ZipFile
|
from calibre.utils.zipfile import safe_replace, ZipFile
|
||||||
from calibre.utils.config import DynamicConfig
|
from calibre.utils.config import DynamicConfig
|
||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
|
from calibre.ebooks.epub.output import EPUBOutput
|
||||||
|
|
||||||
|
TITLEPAGE = EPUBOutput.TITLEPAGE_COVER
|
||||||
|
|
||||||
def character_count(html):
|
def character_count(html):
|
||||||
'''
|
'''
|
||||||
|
@ -14,7 +14,10 @@ class Clean(object):
|
|||||||
from calibre.ebooks.oeb.base import urldefrag
|
from calibre.ebooks.oeb.base import urldefrag
|
||||||
self.oeb, self.log, self.opts = oeb, oeb.log, opts
|
self.oeb, self.log, self.opts = oeb, oeb.log, opts
|
||||||
|
|
||||||
cover_href = ''
|
protected_hrefs = set([])
|
||||||
|
if 'titlepage' in self.oeb.guide:
|
||||||
|
protected_hrefs.add(urldefrag(
|
||||||
|
self.oeb.guide['titlepage'].href)[0])
|
||||||
if 'cover' not in self.oeb.guide:
|
if 'cover' not in self.oeb.guide:
|
||||||
covers = []
|
covers = []
|
||||||
for x in ('other.ms-coverimage-standard',
|
for x in ('other.ms-coverimage-standard',
|
||||||
@ -32,15 +35,15 @@ class Clean(object):
|
|||||||
self.log('Choosing %s:%s as the cover'%(ref.type, ref.href))
|
self.log('Choosing %s:%s as the cover'%(ref.type, ref.href))
|
||||||
ref.type = 'cover'
|
ref.type = 'cover'
|
||||||
self.oeb.guide.refs['cover'] = ref
|
self.oeb.guide.refs['cover'] = ref
|
||||||
cover_href = urldefrag(ref.href)[0]
|
protected_hrefs.add(urldefrag(ref.href)[0])
|
||||||
else:
|
else:
|
||||||
cover_href = urldefrag(self.oeb.guide.refs['cover'].href)[0]
|
protected_hrefs.add(urldefrag(self.oeb.guide.refs['cover'].href)[0])
|
||||||
|
|
||||||
for x in list(self.oeb.guide):
|
for x in list(self.oeb.guide):
|
||||||
href = urldefrag(self.oeb.guide[x].href)[0]
|
href = urldefrag(self.oeb.guide[x].href)[0]
|
||||||
if x.lower() != 'cover':
|
if x.lower() != ('cover', 'titlepage'):
|
||||||
try:
|
try:
|
||||||
if href != cover_href:
|
if href not in protected_hrefs:
|
||||||
self.oeb.manifest.remove(self.oeb.manifest.hrefs[href])
|
self.oeb.manifest.remove(self.oeb.manifest.hrefs[href])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
|
37
src/calibre/ebooks/oeb/transforms/rescale.py
Normal file
37
src/calibre/ebooks/oeb/transforms/rescale.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre import fit_image
|
||||||
|
|
||||||
|
class RescaleImages(object):
|
||||||
|
'Rescale all images to fit inside given screen size'
|
||||||
|
|
||||||
|
def __call__(self, oeb, opts):
|
||||||
|
from PyQt4.Qt import QApplication, QImage, Qt
|
||||||
|
from calibre.gui2 import pixmap_to_data
|
||||||
|
self.oeb, self.opts, self.log = oeb, opts, oeb.log
|
||||||
|
page_width, page_height = opts.dest.width, opts.dest.height
|
||||||
|
for item in oeb.manifest:
|
||||||
|
if item.media_type.startswith('image'):
|
||||||
|
raw = item.data
|
||||||
|
if not raw: continue
|
||||||
|
if QApplication.instance() is None:
|
||||||
|
QApplication([])
|
||||||
|
|
||||||
|
img = QImage(10, 10, QImage.Format_ARGB32_Premultiplied)
|
||||||
|
if not img.loadFromData(raw): continue
|
||||||
|
width, height = img.width(), img.height()
|
||||||
|
scaled, new_width, new_height = fit_image(width, height,
|
||||||
|
page_width, page_height)
|
||||||
|
if scaled:
|
||||||
|
self.log('Rescaling image', item.href)
|
||||||
|
img = img.scaled(new_width, new_height,
|
||||||
|
Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
|
||||||
|
item.data = pixmap_to_data(img)
|
||||||
|
|
||||||
|
|
@ -17,7 +17,7 @@ from lxml.cssselect import CSSSelector
|
|||||||
|
|
||||||
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \
|
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \
|
||||||
urldefrag, rewrite_links, urlunquote
|
urldefrag, rewrite_links, urlunquote
|
||||||
from calibre.ebooks.epub import tostring, rules
|
from calibre.ebooks.epub import rules
|
||||||
|
|
||||||
|
|
||||||
XPath = functools.partial(_XPath, namespaces=NAMESPACES)
|
XPath = functools.partial(_XPath, namespaces=NAMESPACES)
|
||||||
@ -25,6 +25,9 @@ XPath = functools.partial(_XPath, namespaces=NAMESPACES)
|
|||||||
SPLIT_ATTR = 'cs'
|
SPLIT_ATTR = 'cs'
|
||||||
SPLIT_POINT_ATTR = 'csp'
|
SPLIT_POINT_ATTR = 'csp'
|
||||||
|
|
||||||
|
def tostring(root):
|
||||||
|
return etree.tostring(root, encoding='utf-8')
|
||||||
|
|
||||||
class SplitError(ValueError):
|
class SplitError(ValueError):
|
||||||
|
|
||||||
def __init__(self, path, root):
|
def __init__(self, path, root):
|
||||||
|
@ -11,7 +11,7 @@ import re
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import XPNSMAP, TOC
|
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
|
||||||
XPath = lambda x: etree.XPath(x, namespaces=XPNSMAP)
|
XPath = lambda x: etree.XPath(x, namespaces=XPNSMAP)
|
||||||
|
|
||||||
class DetectStructure(object):
|
class DetectStructure(object):
|
||||||
@ -63,11 +63,11 @@ class DetectStructure(object):
|
|||||||
if chapter_mark == 'none':
|
if chapter_mark == 'none':
|
||||||
continue
|
continue
|
||||||
elif chapter_mark == 'rule':
|
elif chapter_mark == 'rule':
|
||||||
mark = etree.Element('hr')
|
mark = etree.Element(XHTML('hr'))
|
||||||
elif chapter_mark == 'pagebreak':
|
elif chapter_mark == 'pagebreak':
|
||||||
mark = etree.Element('div', style=page_break_after)
|
mark = etree.Element(XHTML('div'), style=page_break_after)
|
||||||
else: # chapter_mark == 'both':
|
else: # chapter_mark == 'both':
|
||||||
mark = etree.Element('hr', style=page_break_before)
|
mark = etree.Element(XHTML('hr'), style=page_break_before)
|
||||||
elem.addprevious(mark)
|
elem.addprevious(mark)
|
||||||
|
|
||||||
def create_level_based_toc(self):
|
def create_level_based_toc(self):
|
||||||
@ -114,12 +114,13 @@ class DetectStructure(object):
|
|||||||
def add_leveled_toc_items(self, item):
|
def add_leveled_toc_items(self, item):
|
||||||
level1 = XPath(self.opts.level1_toc)(item.data)
|
level1 = XPath(self.opts.level1_toc)(item.data)
|
||||||
level1_order = []
|
level1_order = []
|
||||||
|
document = item
|
||||||
|
|
||||||
counter = 1
|
counter = 1
|
||||||
if level1:
|
if level1:
|
||||||
added = {}
|
added = {}
|
||||||
for elem in level1:
|
for elem in level1:
|
||||||
text, _href = self.elem_to_link(item, elem, counter)
|
text, _href = self.elem_to_link(document, elem, counter)
|
||||||
counter += 1
|
counter += 1
|
||||||
if text:
|
if text:
|
||||||
node = self.oeb.toc.add(text, _href,
|
node = self.oeb.toc.add(text, _href,
|
||||||
@ -132,11 +133,11 @@ class DetectStructure(object):
|
|||||||
level2 = list(XPath(self.opts.level2_toc)(item.data))
|
level2 = list(XPath(self.opts.level2_toc)(item.data))
|
||||||
for elem in level2:
|
for elem in level2:
|
||||||
level1 = None
|
level1 = None
|
||||||
for item in item.data.iterdescendants():
|
for item in document.data.iterdescendants():
|
||||||
if item in added.keys():
|
if item in added.keys():
|
||||||
level1 = added[item]
|
level1 = added[item]
|
||||||
elif item == elem and level1 is not None:
|
elif item == elem and level1 is not None:
|
||||||
text, _href = self.elem_to_link(item, elem, counter)
|
text, _href = self.elem_to_link(document, elem, counter)
|
||||||
counter += 1
|
counter += 1
|
||||||
if text:
|
if text:
|
||||||
added2[elem] = level1.add(text, _href,
|
added2[elem] = level1.add(text, _href,
|
||||||
@ -145,12 +146,12 @@ class DetectStructure(object):
|
|||||||
level3 = list(XPath(self.opts.level3_toc)(item.data))
|
level3 = list(XPath(self.opts.level3_toc)(item.data))
|
||||||
for elem in level3:
|
for elem in level3:
|
||||||
level2 = None
|
level2 = None
|
||||||
for item in item.data.iterdescendants():
|
for item in document.data.iterdescendants():
|
||||||
if item in added2.keys():
|
if item in added2.keys():
|
||||||
level2 = added2[item]
|
level2 = added2[item]
|
||||||
elif item == elem and level2 is not None:
|
elif item == elem and level2 is not None:
|
||||||
text, _href = \
|
text, _href = \
|
||||||
self.elem_to_link(item, elem, counter)
|
self.elem_to_link(document, elem, counter)
|
||||||
counter += 1
|
counter += 1
|
||||||
if text:
|
if text:
|
||||||
level2.add(text, _href,
|
level2.add(text, _href,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user