Sync to pluginize

This commit is contained in:
John Schember 2009-04-24 20:24:15 -04:00
commit 202958cb4d
30 changed files with 666 additions and 4075 deletions

View File

@ -287,6 +287,7 @@ from calibre.ebooks.odt.input import ODTInput
from calibre.ebooks.rtf.input import RTFInput
from calibre.ebooks.html.input import HTMLInput
from calibre.ebooks.oeb.output import OEBOutput
from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.txt.output import TXTOutput
from calibre.ebooks.pdf.output import PDFOutput
from calibre.ebooks.pdb.ereader.output import EREADEROutput
@ -294,7 +295,7 @@ from calibre.customize.profiles import input_profiles, output_profiles
plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
FB2Input, ODTInput, RTFInput, EREADEROutput]
FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
x.__name__.endswith('MetadataReader')]
plugins += [x for x in list(locals().values()) if isinstance(x, type) and \

View File

@ -3,7 +3,7 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, re
import re
from itertools import izip
from calibre.customize import Plugin as _Plugin
@ -22,7 +22,7 @@ class Plugin(_Plugin):
fbase = 12
fsizes = [5, 7, 9, 12, 13.5, 17, 20, 22, 24]
screen_size = (800, 600)
screen_size = (1600, 1200)
dpi = 100
def __init__(self, *args, **kwargs):

View File

@ -117,6 +117,9 @@ def add_pipeline_options(parser, plumber):
'line_height',
'linearize_tables',
'extra_css',
'margin_top', 'margin_left', 'margin_right',
'margin_bottom', 'dont_justify',
'insert_blank_line', 'remove_paragraph_spacing',
]
),
@ -124,6 +127,8 @@ def add_pipeline_options(parser, plumber):
_('Control auto-detection of document structure.'),
[
'dont_split_on_page_breaks', 'chapter', 'chapter_mark',
'prefer_metadata_cover', 'remove_first_image',
'insert_comments',
]
),

View File

@ -195,7 +195,7 @@ OptionRecommendation(name='toc_filter',
OptionRecommendation(name='chapter',
recommended_value="//*[((name()='h1' or name()='h2') and "
"re:test(., 'chapter|book|section|part', 'i')) or @class "
r"re:test(., 'chapter|book|section|part\s+', 'i')) or @class "
"= 'chapter']", level=OptionRecommendation.LOW,
help=_('An XPath expression to detect chapter titles. The default '
'is to consider <h1> or <h2> tags that contain the words '
@ -227,6 +227,64 @@ OptionRecommendation(name='extra_css',
'rules.')
),
OptionRecommendation(name='margin_top',
recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the top margin in pts. Default is %default')),
OptionRecommendation(name='margin_bottom',
recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the bottom margin in pts. Default is %default')),
OptionRecommendation(name='margin_left',
recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the left margin in pts. Default is %default')),
OptionRecommendation(name='margin_right',
recommended_value=5.0, level=OptionRecommendation.LOW,
help=_('Set the right margin in pts. Default is %default')),
OptionRecommendation(name='dont_justify',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Do not force text to be justified in output. Whether text '
'is actually displayed justified or not depends on whether '
'the ebook format and reading device support justification.')
),
OptionRecommendation(name='remove_paragraph_spacing',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Remove spacing between paragraphs. Also sets an indent on '
'paragraphs of 1.5em. Spacing removal will not work '
'if the source file does not use paragraphs (<p> or <div> tags).')
),
OptionRecommendation(name='prefer_metadata_cover',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Use the cover detected from the source file in preference '
'to the specified cover.')
),
OptionRecommendation(name='insert_blank_line',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Insert a blank line between paragraphs. Will not work '
'if the source file does not use paragraphs (<p> or <div> tags).'
)
),
OptionRecommendation(name='remove_first_image',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Remove the first image from the input ebook. Useful if the '
'first image in the source file is a cover and you are specifying '
'an external cover.'
)
),
OptionRecommendation(name='insert_comments',
recommended_value=False, level=OptionRecommendation.LOW,
help=_('Insert the comments/summary from the book metadata at the start of '
'the book. This is useful if your ebook reader does not support '
'displaying the comments from the metadata.'
)
),
OptionRecommendation(name='read_metadata_from_opf',
@ -244,7 +302,8 @@ OptionRecommendation(name='title',
OptionRecommendation(name='authors',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the authors. Multiple authors should be separated ')),
help=_('Set the authors. Multiple authors should be separated by '
'ampersands.')),
OptionRecommendation(name='title_sort',
recommended_value=None, level=OptionRecommendation.LOW,
@ -428,7 +487,6 @@ OptionRecommendation(name='language',
mi.cover = None
self.user_metadata = mi
def setup_options(self):
'''
Setup the `self.opts` object.
@ -479,9 +537,16 @@ OptionRecommendation(name='language',
if not hasattr(self.oeb, 'manifest'):
self.oeb = create_oebbook(self.log, self.oeb, self.opts)
from calibre.ebooks.oeb.transforms.guide import Clean
Clean()(self.oeb, self.opts)
self.opts.source = self.opts.input_profile
self.opts.dest = self.opts.output_profile
from calibre.ebooks.oeb.transforms.metadata import MergeMetadata
MergeMetadata()(self.oeb, self.user_metadata,
self.opts.prefer_metadata_cover)
from calibre.ebooks.oeb.transforms.structure import DetectStructure
DetectStructure()(self.oeb, self.opts)
@ -495,6 +560,9 @@ OptionRecommendation(name='language',
else:
fkey = map(float, fkey.split(','))
from calibre.ebooks.oeb.transforms.jacket import Jacket
Jacket()(self.oeb, self.opts)
if self.opts.extra_css and os.path.exists(self.opts.extra_css):
self.opts.extra_css = open(self.opts.extra_css, 'rb').read()

View File

@ -6,32 +6,7 @@ __docformat__ = 'restructuredtext en'
'''
Conversion to EPUB.
'''
import sys, textwrap, re, os, uuid
from itertools import cycle
from calibre.utils.config import Config, StringConfig
from calibre.utils.zipfile import ZipFile, ZIP_STORED
from calibre.ebooks.html import tostring
from lxml import etree
class DefaultProfile(object):
flow_size = sys.maxint
screen_size = None
remove_special_chars = False
remove_object_tags = False
class PRS505(DefaultProfile):
flow_size = 270000
screen_size = (590, 765)
remove_special_chars = re.compile(u'[\u200b\u00ad]')
remove_object_tags = True
PROFILES = {
'PRS505' : PRS505,
'None' : DefaultProfile,
}
def rules(stylesheets):
for s in stylesheets:
@ -58,152 +33,4 @@ def initialize_container(path_to_container, opf_name='metadata.opf'):
zf.writestr('META-INF/container.xml', CONTAINER)
return zf
def config(defaults=None, name='epub'):
desc = _('Options to control the conversion to EPUB')
if defaults is None:
c = Config(name, desc)
else:
c = StringConfig(defaults, desc)
c.update(common_config())
c.remove_opt('output')
c.remove_opt('zip')
c.add_opt('output', ['-o', '--output'], default=None,
help=_('The output EPUB file. If not specified, it is '
'derived from the input file name.'))
c.add_opt('profile', ['--profile'], default='PRS505', choices=list(PROFILES.keys()),
help=_('Profile of the target device this EPUB is meant for. '
'Set to None to create a device independent EPUB. '
'The profile is used for device specific restrictions '
'on the EPUB. Choices are: ')+str(list(PROFILES.keys())))
c.add_opt('override_css', ['--override-css'], default=None,
help=_('Either the path to a CSS stylesheet or raw CSS. '
'This CSS will override any existing CSS '
'declarations in the source files.'))
structure = c.add_group('structure detection',
_('Control auto-detection of document structure.'))
structure('chapter', ['--chapter'],
default="//*[re:match(name(), 'h[1-2]') and "
"re:test(., 'chapter|book|section|part', 'i')] | "
"//*[@class = 'chapter']",
help=_('''\
An XPath expression to detect chapter titles. The default is to consider <h1> or
<h2> tags that contain the words "chapter","book","section" or "part" as chapter titles as
well as any tags that have class="chapter".
The expression used must evaluate to a list of elements. To disable chapter detection,
use the expression "/". See the XPath Tutorial in the calibre User Manual for further
help on using this feature.
''').replace('\n', ' '))
structure('chapter_mark', ['--chapter-mark'], choices=['pagebreak', 'rule', 'both', 'none'],
default='pagebreak',
help=_('Specify how to mark detected chapters. A value of '
'"pagebreak" will insert page breaks before chapters. '
'A value of "rule" will insert a line before chapters. '
'A value of "none" will disable chapter marking and a '
'value of "both" will use both page breaks and lines '
'to mark chapters.'))
structure('cover', ['--cover'], default=None,
help=_('Path to the cover to be used for this book'))
structure('prefer_metadata_cover', ['--prefer-metadata-cover'], default=False,
action='store_true',
help=_('Use the cover detected from the source file in preference '
'to the specified cover.'))
structure('remove_first_image', ['--remove-first-image'], default=False,
help=_('Remove the first image from the input ebook. Useful if '
'the first image in the source file is a cover and you '
'are specifying an external cover.'))
structure('dont_split_on_page_breaks', ['--dont-split-on-page-breaks'], default=False,
help=_('Turn off splitting at page breaks. Normally, input files '
'are automatically split at every page break into '
'two files. This gives an output ebook that can be parsed '
'faster and with less resources. However, splitting is '
'slow and if your source file contains a very large '
'number of page breaks, you should turn off splitting '
'on page breaks.'))
structure('page', ['--page'], default=None,
help=_('XPath expression to detect page boundaries for building '
'a custom pagination map, as used by AdobeDE. Default is '
'not to build an explicit pagination map.'))
structure('page_names', ['--page-names'], default=None,
help=_('XPath expression to find the name of each page in the '
'pagination map relative to its boundary element. '
'Default is to number all pages staring with 1.'))
toc = c.add_group('toc',
_('''\
Control the automatic generation of a Table of Contents. If an OPF file is detected
and it specifies a Table of Contents, then that will be used rather than trying
to auto-generate a Table of Contents.
''').replace('\n', ' '))
toc('max_toc_links', ['--max-toc-links'], default=50,
help=_('Maximum number of links to insert into the TOC. Set to 0 '
'to disable. Default is: %default. Links are only added to the '
'TOC if less than the --toc-threshold number of chapters were detected.'))
toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
help=_("Don't add auto-detected chapters to the Table of Contents."))
toc('toc_threshold', ['--toc-threshold'], default=6,
help=_('If fewer than this number of chapters is detected, then links '
'are added to the Table of Contents. Default: %default'))
toc('level1_toc', ['--level1-toc'], default=None,
help=_('XPath expression that specifies all tags that should be added '
'to the Table of Contents at level one. If this is specified, '
'it takes precedence over other forms of auto-detection.'))
toc('level2_toc', ['--level2-toc'], default=None,
help=_('XPath expression that specifies all tags that should be added '
'to the Table of Contents at level two. Each entry is added '
'under the previous level one entry.'))
toc('level3_toc', ['--level3-toc'], default=None,
help=_('XPath expression that specifies all tags that should be added '
'to the Table of Contents at level three. Each entry is added '
'under the previous level two entry.'))
toc('from_ncx', ['--from-ncx'], default=None,
help=_('Path to a .ncx file that contains the table of contents to use '
'for this ebook. The NCX file should contain links relative to '
'the directory it is placed in. See '
'http://www.niso.org/workrooms/daisy/Z39-86-2005.html#NCX for '
'an overview of the NCX format.'))
toc('use_auto_toc', ['--use-auto-toc'], default=False,
help=_('Normally, if the source file already has a Table of Contents, '
'it is used in preference to the auto-generated one. '
'With this option, the auto-generated one is always used.'))
layout = c.add_group('page layout', _('Control page layout'))
layout('margin_top', ['--margin-top'], default=5.0,
help=_('Set the top margin in pts. Default is %default'))
layout('margin_bottom', ['--margin-bottom'], default=5.0,
help=_('Set the bottom margin in pts. Default is %default'))
layout('margin_left', ['--margin-left'], default=5.0,
help=_('Set the left margin in pts. Default is %default'))
layout('margin_right', ['--margin-right'], default=5.0,
help=_('Set the right margin in pts. Default is %default'))
layout('base_font_size2', ['--base-font-size'], default=12.0,
help=_('The base font size in pts. Default is %defaultpt. '
'Set to 0 to disable rescaling of fonts.'))
layout('remove_paragraph_spacing', ['--remove-paragraph-spacing'], default=False,
help=_('Remove spacing between paragraphs. '
'Also sets a indent on paragraphs of 1.5em. '
'You can override this by adding p {text-indent: 0cm} to '
'--override-css. Spacing removal will not work if the source '
'file forces inter-paragraph spacing.'))
layout('no_justification', ['--no-justification'], default=False,
help=_('Do not force text to be justified in output.'))
layout('linearize_tables', ['--linearize-tables'], default=False,
help=_('Remove table markup, converting it into paragraphs. '
'This is useful if your source file uses a table to manage layout.'))
layout('preserve_tag_structure', ['--preserve-tag-structure'], default=False,
help=_('Preserve the HTML tag structure while splitting large HTML files. '
'This is only neccessary if the HTML files contain CSS that '
'uses sibling selectors. Enabling this greatly slows down '
'processing of large HTML files.'))
c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
help=_('Print generated OPF file to stdout'))
c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
help=_('Print generated NCX file to stdout'))
c.add_opt('keep_intermediate', ['--keep-intermediate-files'], group='debug',
default=False,
help=_('Keep intermediate files during processing by html2epub'))
c.add_opt('extract_to', ['--extract-to'], group='debug', default=None,
help=_('Extract the contents of the produced EPUB file to the '
'specified directory.'))
return c

View File

@ -1,300 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Font size rationalization. See :function:`relativize`.
'''
import logging, re, operator, functools, collections, unittest, copy, sys
from xml.dom import SyntaxErr
from lxml.cssselect import CSSSelector
from lxml import etree
from lxml.html import HtmlElement
from calibre.ebooks.html_old import fromstring
from calibre.ebooks.epub import rules
from cssutils import CSSParser
num = r'[-]?\d+|[-]?\d*\.\d+'
length = r'(?P<zero>0)|(?P<num>{num})(?P<unit>%|em|ex|px|in|cm|mm|pt|pc)'.replace('{num}', num)
absolute_size = r'(?P<abs>(x?x-)?(small|large)|medium)'
relative_size = r'(?P<rel>smaller|larger)'
font_size_pat = re.compile('|'.join((relative_size, absolute_size, length)), re.I)
line_height_pat = re.compile(r'({num})(px|in|cm|mm|pt|pc)'.replace('{num}', num))
PTU = {
'in' : 72.,
'cm' : 72/2.54,
'mm' : 72/25.4,
'pt' : 1.0,
'pc' : 1/12.,
}
DEFAULT_FONT_SIZE = 12
class Rationalizer(object):
@classmethod
def specificity(cls, s):
'''Map CSS specificity tuple to a single integer'''
return sum([10**(4-i) + x for i,x in enumerate(s)])
@classmethod
def compute_font_size(cls, elem):
'''
Calculate the effective font size of an element traversing its ancestors as far as
neccessary.
'''
cfs = elem.computed_font_size
if cfs is not None:
return
sfs = elem.specified_font_size
if callable(sfs):
parent = elem.getparent()
cls.compute_font_size(parent)
elem.computed_font_size = sfs(parent.computed_font_size)
else:
elem.computed_font_size = sfs
@classmethod
def calculate_font_size(cls, style):
'Return font size in pts from style object. For relative units returns a callable'
match = font_size_pat.search(style.font)
fs = ''
if match:
fs = match.group()
if style.fontSize:
fs = style.fontSize
match = font_size_pat.search(fs)
if match is None:
return None
match = match.groupdict()
unit = match.get('unit', '')
if unit: unit = unit.lower()
if unit in PTU.keys():
return PTU[unit] * float(match['num'])
if unit in ('em', 'ex'):
return functools.partial(operator.mul, float(match['num']))
if unit == '%':
return functools.partial(operator.mul, float(match['num'])/100.)
abs = match.get('abs', '')
if abs: abs = abs.lower()
if abs:
x = (1.2)**(abs.count('x') * (-1 if 'small' in abs else 1))
return 12 * x
if match.get('zero', False):
return 0.
return functools.partial(operator.mul, 1.2) if 'larger' in fs.lower() else functools.partial(operator.mul, 0.8)
@classmethod
def resolve_rules(cls, stylesheets):
for sheet in stylesheets:
if hasattr(sheet, 'fs_rules'):
continue
sheet.fs_rules = []
sheet.lh_rules = []
for r in sheet:
if r.type == r.STYLE_RULE:
font_size = cls.calculate_font_size(r.style)
if font_size is not None:
for s in r.selectorList:
sheet.fs_rules.append([CSSSelector(s.selectorText), font_size])
orig = line_height_pat.search(r.style.lineHeight)
if orig is not None:
for s in r.selectorList:
sheet.lh_rules.append([CSSSelector(s.selectorText), float(orig.group(1)) * PTU[orig.group(2).lower()]])
@classmethod
def apply_font_size_rules(cls, stylesheets, root):
'Add a ``specified_font_size`` attribute to every element that has a specified font size'
cls.resolve_rules(stylesheets)
for sheet in stylesheets:
for selector, font_size in sheet.fs_rules:
elems = selector(root)
for elem in elems:
elem.specified_font_size = font_size
@classmethod
def remove_font_size_information(cls, stylesheets):
for r in rules(stylesheets):
r.style.removeProperty('font-size')
try:
new = font_size_pat.sub('', r.style.font).strip()
if new:
r.style.font = new
else:
r.style.removeProperty('font')
except SyntaxErr:
r.style.removeProperty('font')
if line_height_pat.search(r.style.lineHeight) is not None:
r.style.removeProperty('line-height')
@classmethod
def compute_font_sizes(cls, root, stylesheets, base=12):
stylesheets = [s for s in stylesheets if hasattr(s, 'cssText')]
cls.apply_font_size_rules(stylesheets, root)
# Compute the effective font size of all tags
root.computed_font_size = DEFAULT_FONT_SIZE
for elem in root.iter(etree.Element):
cls.compute_font_size(elem)
extra_css = {}
if base > 0:
# Calculate the "base" (i.e. most common) font size
font_sizes = collections.defaultdict(lambda : 0)
body = root.xpath('//body')[0]
IGNORE = ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')
for elem in body.iter(etree.Element):
if elem.tag not in IGNORE:
t = getattr(elem, 'text', '')
if t: t = t.strip()
if t:
font_sizes[elem.computed_font_size] += len(t)
t = getattr(elem, 'tail', '')
if t: t = t.strip()
if t:
parent = elem.getparent()
if parent.tag not in IGNORE:
font_sizes[parent.computed_font_size] += len(t)
try:
most_common = max(font_sizes.items(), key=operator.itemgetter(1))[0]
scale = base/most_common if most_common > 0 else 1.
except ValueError:
scale = 1.
# rescale absolute line-heights
counter = 0
for sheet in stylesheets:
for selector, lh in sheet.lh_rules:
for elem in selector(root):
elem.set('id', elem.get('id', 'cfs_%d'%counter))
counter += 1
if not extra_css.has_key(elem.get('id')):
extra_css[elem.get('id')] = []
extra_css[elem.get('id')].append('line-height:%fpt'%(lh*scale))
# Rescale all computed font sizes
for elem in body.iter(etree.Element):
if isinstance(elem, HtmlElement):
elem.computed_font_size *= scale
# Remove all font size specifications from the last stylesheet
cls.remove_font_size_information(stylesheets[-1:])
# Create the CSS to implement the rescaled font sizes
for elem in body.iter(etree.Element):
cfs, pcfs = map(operator.attrgetter('computed_font_size'), (elem, elem.getparent()))
if abs(cfs-pcfs) > 1/12. and abs(pcfs) > 1/12.:
elem.set('id', elem.get('id', 'cfs_%d'%counter))
counter += 1
if not extra_css.has_key(elem.get('id')):
extra_css[elem.get('id')] = []
extra_css[elem.get('id')].append('font-size: %f%%'%(100*(cfs/pcfs)))
css = CSSParser(loglevel=logging.ERROR).parseString('')
for id, r in extra_css.items():
css.add('#%s {%s}'%(id, ';'.join(r)))
return css
@classmethod
def rationalize(cls, stylesheets, root, opts):
logger = logging.getLogger('html2epub')
logger.info('\t\tRationalizing fonts...')
extra_css = None
if opts.base_font_size2 > 0:
try:
extra_css = cls.compute_font_sizes(root, stylesheets, base=opts.base_font_size2)
except:
logger.warning('Failed to rationalize font sizes.')
if opts.verbose > 1:
logger.exception('')
finally:
root.remove_font_size_information()
logger.debug('\t\tDone rationalizing')
return extra_css
################################################################################
############## Testing
################################################################################
class FontTest(unittest.TestCase):
def setUp(self):
from calibre.ebooks.epub import config
self.opts = config(defaults='').parse()
self.html = '''
<html>
<head>
<title>Test document</title>
</head>
<body>
<div id="div1">
<!-- A comment -->
<p id="p1">Some <b>text</b></p>
</div>
<p id="p2">Some other <span class="it">text</span>.</p>
<p id="longest">The longest piece of single font size text in this entire file. Used to test resizing.</p>
</body>
</html>
'''
self.root = fromstring(self.html)
def do_test(self, css, base=DEFAULT_FONT_SIZE, scale=1):
root1 = copy.deepcopy(self.root)
root1.computed_font_size = DEFAULT_FONT_SIZE
stylesheet = CSSParser(loglevel=logging.ERROR).parseString(css)
stylesheet2 = Rationalizer.compute_font_sizes(root1, [stylesheet], base)
root2 = copy.deepcopy(root1)
root2.remove_font_size_information()
root2.computed_font_size = DEFAULT_FONT_SIZE
Rationalizer.apply_font_size_rules([stylesheet2], root2)
for elem in root2.iter(etree.Element):
Rationalizer.compute_font_size(elem)
for e1, e2 in zip(root1.xpath('//body')[0].iter(etree.Element), root2.xpath('//body')[0].iter(etree.Element)):
self.assertAlmostEqual(e1.computed_font_size, e2.computed_font_size,
msg='Computed font sizes for %s not equal. Original: %f Processed: %f'%\
(root1.getroottree().getpath(e1), e1.computed_font_size, e2.computed_font_size))
return stylesheet2.cssText
def testStripping(self):
'Test that any original entries are removed from the CSS'
css = 'p { font: bold 10px italic smaller; font-size: x-large} \na { font-size: 0 }'
css = CSSParser(loglevel=logging.ERROR).parseString(css)
Rationalizer.compute_font_sizes(copy.deepcopy(self.root), [css])
self.assertEqual(css.cssText.replace(' ', '').replace('\n', ''),
'p{font:bolditalic}')
def testIdentity(self):
'Test that no unnecessary font size changes are made'
extra_css = self.do_test('div {font-size:12pt} \nspan {font-size:100%}')
self.assertEqual(extra_css.strip(), '')
def testRelativization(self):
'Test conversion of absolute to relative sizes'
self.do_test('#p1 {font: 24pt} b {font: 12pt} .it {font: 48pt} #p2 {font: 100%}')
def testResizing(self):
'Test resizing of fonts'
self.do_test('#longest {font: 24pt} .it {font:20pt; line-height:22pt}')
def suite():
return unittest.TestLoader().loadTestsFromTestCase(FontTest)
def test():
unittest.TextTestRunner(verbosity=2).run(suite())
if __name__ == '__main__':
sys.exit(test())

View File

@ -1,93 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Convert any ebook format to epub.
'''
import sys, os, re
from contextlib import nested
from calibre import extract, walk
from calibre.ebooks import DRMError
from calibre.ebooks.epub import config as common_config
from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile
from calibre.customize.ui import run_plugins_on_preprocess
SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']
def unarchive(path, tdir):
extract(path, tdir)
files = list(walk(tdir))
for ext in ['opf'] + list(MAP.keys()):
for f in files:
if f.lower().endswith('.'+ext):
if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
continue
return f, ext
return find_html_index(files)
def any2epub(opts, path, notification=None, create_epub=True,
oeb_cover=False, extract_to=None):
path = run_plugins_on_preprocess(path)
ext = os.path.splitext(path)[1]
if not ext:
raise ValueError('Unknown file type: '+path)
ext = ext.lower()[1:]
if opts.output is None:
opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
if ext in ['rar', 'zip', 'oebzip']:
path, ext = unarchive(path, tdir1)
print 'Found %s file in archive'%(ext.upper())
if ext in MAP.keys():
path = MAP[ext](path, tdir2, opts)
ext = 'opf'
if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
raise ValueError('Conversion from %s is not supported'%ext.upper())
print 'Creating EPUB file...'
html2epub(path, opts, notification=notification,
create_epub=create_epub, oeb_cover=oeb_cover,
extract_to=extract_to)
def config(defaults=None):
return common_config(defaults=defaults)
def formats():
return ['html', 'rar', 'zip', 'oebzip']+list(MAP.keys())
USAGE = _('''\
%%prog [options] filename
Convert any of a large number of ebook formats to a %s file. Supported formats are: %s
''')
def option_parser(usage=USAGE):
return config().option_parser(usage=usage%('EPUB', formats()))
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
print 'No input file specified.'
return 1
any2epub(opts, args[1])
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,71 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Convert periodical content into EPUB ebooks.
'''
import sys, glob, os
from calibre.web.feeds.main import config as feeds2disk_config, USAGE, run_recipe
from calibre.ebooks.epub.from_html import config as html2epub_config
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.epub.from_html import convert as html2epub
from calibre import strftime, sanitize_file_name
def config(defaults=None):
c = feeds2disk_config(defaults=defaults)
c.remove('lrf')
c.remove('epub')
c.remove('output_dir')
c.update(html2epub_config(defaults=defaults))
c.remove('chapter_mark')
return c
def option_parser():
c = config()
return c.option_parser(usage=USAGE)
def convert(opts, recipe_arg, notification=None):
opts.lrf = False
opts.epub = True
if opts.debug:
opts.verbose = 2
parser = option_parser()
with TemporaryDirectory('_feeds2epub') as tdir:
opts.output_dir = tdir
recipe = run_recipe(opts, recipe_arg, parser, notification=notification)
c = config()
recipe_opts = c.parse_string(recipe.html2epub_options)
c.smart_update(recipe_opts, opts)
opts = recipe_opts
opts.chapter_mark = 'none'
opts.dont_split_on_page_breaks = True
opf = glob.glob(os.path.join(tdir, '*.opf'))
if not opf:
raise Exception('Downloading of recipe: %s failed'%recipe_arg)
opf = opf[0]
if opts.output is None:
fname = recipe.title + strftime(recipe.timefmt) + '.epub'
opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
print 'Generating epub...'
opts.encoding = 'utf-8'
opts.remove_paragraph_spacing = True
html2epub(opf, opts, notification=notification)
def main(args=sys.argv, notification=None, handler=None):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) != 2 and opts.feeds is None:
parser.print_help()
return 1
recipe_arg = args[1] if len(args) > 1 else None
convert(opts, recipe_arg, notification=notification)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,547 +0,0 @@
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Conversion of HTML/OPF files follows several stages:
* All links in the HTML files or in the OPF manifest are
followed to build up a list of HTML files to be converted.
This stage is implemented by
:function:`calibre.ebooks.html.traverse` and
:class:`calibre.ebooks.html.HTMLFile`.
* The HTML is pre-processed to make it more semantic.
All links in the HTML files to other resources like images,
stylesheets, etc. are relativized. The resources are copied
into the `resources` sub directory. This is accomplished by
:class:`calibre.ebooks.html.PreProcessor` and
:class:`calibre.ebooks.html.Parser`.
* The HTML is processed. Various operations are performed.
All style declarations are extracted and consolidated into
a single style sheet. Chapters are auto-detected and marked.
Various font related manipulations are performed. See
:class:`HTMLProcessor`.
* The processed HTML is saved and the
:module:`calibre.ebooks.epub.split` module is used to split up
large HTML files into smaller chunks.
* The EPUB container is created.
'''
import os, sys, cStringIO, logging, re, functools, shutil
from lxml.etree import XPath
from lxml import html, etree
from PyQt4.Qt import QApplication, QPixmap, Qt
from calibre.ebooks.html_old import Processor, merge_metadata, get_filelist,\
opf_traverse, create_metadata, rebase_toc, Link, parser
from calibre.ebooks.epub import config as common_config, tostring
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.epub import initialize_container, PROFILES
from calibre.ebooks.epub.split import split
from calibre.ebooks.epub.pages import add_page_map
from calibre.ebooks.epub.fonts import Rationalizer
from calibre.constants import preferred_encoding
from calibre.customize.ui import run_plugins_on_postprocess
from calibre import walk, CurrentDir, to_unicode, fit_image
content = functools.partial(os.path.join, u'content')
def remove_bad_link(element, attribute, link, pos):
if attribute is not None:
if element.tag in ['link']:
element.getparent().remove(element)
else:
element.set(attribute, '')
del element.attrib[attribute]
def check_links(opf_path, pretty_print):
'''
Find and remove all invalid links in the HTML files
'''
logger = logging.getLogger('html2epub')
logger.info('\tChecking files for bad links...')
pathtoopf = os.path.abspath(opf_path)
with CurrentDir(os.path.dirname(pathtoopf)):
opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
html_files = []
for item in opf.itermanifest():
if 'html' in item.get('media-type', '').lower():
f = item.get('href').split('/')[-1]
if isinstance(f, str):
f = f.decode('utf-8')
html_files.append(os.path.abspath(content(f)))
for path in html_files:
if not os.access(path, os.R_OK):
continue
base = os.path.dirname(path)
root = html.fromstring(open(content(path), 'rb').read(), parser=parser)
for element, attribute, link, pos in list(root.iterlinks()):
link = to_unicode(link)
plink = Link(link, base)
bad = False
if plink.path is not None and not os.path.exists(plink.path):
bad = True
if bad:
remove_bad_link(element, attribute, link, pos)
open(content(path), 'wb').write(tostring(root, pretty_print))
def find_html_index(files):
'''
Given a list of files, find the most likely root HTML file in the
list.
'''
html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
html_files = [f for f in files if html_pat.search(f) is not None]
if not html_files:
raise ValueError(_('Could not find an ebook inside the archive'))
html_files = [(f, os.stat(f).st_size) for f in html_files]
html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
html_files = [f[0] for f in html_files]
for q in ('toc', 'index'):
for f in html_files:
if os.path.splitext(os.path.basename(f))[0].lower() == q:
return f, os.path.splitext(f)[1].lower()[1:]
return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
def rescale_images(imgdir, screen_size, log):
pwidth, pheight = screen_size
if QApplication.instance() is None:
QApplication([])
for f in os.listdir(imgdir):
path = os.path.join(imgdir, f)
if os.path.splitext(f)[1] in ('.css', '.js'):
continue
p = QPixmap()
p.load(path)
if p.isNull():
continue
width, height = p.width(), p.height()
scaled, new_width, new_height = fit_image(width, height, pwidth,
pheight)
if scaled:
log.info('Rescaling image: '+f)
p.scaled(new_width, new_height, Qt.IgnoreAspectRatio,
Qt.SmoothTransformation).save(path, 'JPEG')
class HTMLProcessor(Processor, Rationalizer):
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets):
Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
name='html2epub')
if opts.verbose > 2:
self.debug_tree('parsed')
self.detect_chapters()
self.extract_css(stylesheets)
if self.opts.base_font_size2 > 0:
self.font_css = self.rationalize(self.external_stylesheets+[self.stylesheet],
self.root, self.opts)
if opts.verbose > 2:
self.debug_tree('nocss')
if hasattr(self.body, 'xpath'):
for script in list(self.body.xpath('descendant::script')):
script.getparent().remove(script)
self.fix_markup()
def convert_image(self, img):
rpath = img.get('src', '')
path = os.path.join(os.path.dirname(self.save_path()), *rpath.split('/'))
if os.path.exists(path) and os.path.isfile(path):
if QApplication.instance() is None:
app = QApplication([])
app
p = QPixmap()
p.load(path)
if not p.isNull():
p.save(path + '_calibre_converted.jpg')
os.remove(path)
for key, val in self.resource_map.items():
if val == rpath:
self.resource_map[key] = rpath+'_calibre_converted.jpg'
img.set('src', rpath+'_calibre_converted.jpg')
def fix_markup(self):
'''
Perform various markup transforms to get the output to render correctly
in the quirky ADE.
'''
# Replace <br> that are children of <body> as ADE doesn't handle them
if hasattr(self.body, 'xpath'):
for br in self.body.xpath('./br'):
if br.getparent() is None:
continue
try:
sibling = br.itersiblings().next()
except:
sibling = None
br.tag = 'p'
br.text = u'\u00a0'
if (br.tail and br.tail.strip()) or sibling is None or \
getattr(sibling, 'tag', '') != 'br':
style = br.get('style', '').split(';')
style = filter(None, map(lambda x: x.strip(), style))
style.append('margin: 0pt; border:0pt; height:0pt')
br.set('style', '; '.join(style))
else:
sibling.getparent().remove(sibling)
if sibling.tail:
if not br.tail:
br.tail = ''
br.tail += sibling.tail
if self.opts.profile.remove_object_tags:
for tag in self.root.xpath('//embed'):
tag.getparent().remove(tag)
for tag in self.root.xpath('//object'):
if tag.get('type', '').lower().strip() in ('image/svg+xml',):
continue
tag.getparent().remove(tag)
for tag in self.root.xpath('//title|//style'):
if not tag.text:
tag.getparent().remove(tag)
for tag in self.root.xpath('//script'):
if not tag.text and not tag.get('src', False):
tag.getparent().remove(tag)
for tag in self.root.xpath('//form'):
tag.getparent().remove(tag)
for tag in self.root.xpath('//center'):
tag.tag = 'div'
tag.set('style', 'text-align:center')
if self.opts.linearize_tables:
for tag in self.root.xpath('//table | //tr | //th | //td'):
tag.tag = 'div'
# ADE can't handle &amp; in an img url
for tag in self.root.xpath('//img[@src]'):
tag.set('src', tag.get('src', '').replace('&', ''))
def save(self):
for meta in list(self.root.xpath('//meta')):
meta.getparent().remove(meta)
# Strip all comments since Adobe DE is petrified of them
Processor.save(self, strip_comments=True)
def remove_first_image(self):
images = self.root.xpath('//img')
if images:
images[0].getparent().remove(images[0])
return True
return False
def config(defaults=None):
return common_config(defaults=defaults)
def option_parser():
c = config()
return c.option_parser(usage=_('''\
%prog [options] file.html|opf
Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file.
If you specify an OPF file instead of an HTML file, the list of links is takes from
the <spine> element of the OPF file.
'''))
def parse_content(filelist, opts, tdir):
os.makedirs(os.path.join(tdir, 'content', 'resources'))
resource_map, stylesheets = {}, {}
toc = TOC(base_path=tdir, type='root')
stylesheet_map = {}
first_image_removed = False
for htmlfile in filelist:
logging.getLogger('html2epub').debug('Processing %s...'%htmlfile)
hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
resource_map, filelist, stylesheets)
if not first_image_removed and opts.remove_first_image:
first_image_removed = hp.remove_first_image()
hp.populate_toc(toc)
hp.save()
stylesheet_map[os.path.basename(hp.save_path())] = \
[s for s in hp.external_stylesheets + [hp.stylesheet, hp.font_css, hp.override_css] if s is not None]
logging.getLogger('html2epub').debug('Saving stylesheets...')
if opts.base_font_size2 > 0:
Rationalizer.remove_font_size_information(stylesheets.values())
for path, css in stylesheets.items():
raw = getattr(css, 'cssText', css)
if isinstance(raw, unicode):
raw = raw.encode('utf-8')
open(path, 'wb').write(raw)
if toc.count('chapter') > opts.toc_threshold:
toc.purge(['file', 'link', 'unknown'])
if toc.count('chapter') + toc.count('file') > opts.toc_threshold:
toc.purge(['link', 'unknown'])
toc.purge(['link'], max=opts.max_toc_links)
return resource_map, hp.htmlfile_map, toc, stylesheet_map
TITLEPAGE = '''\
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<title>Cover</title>
<style type="text/css" title="override_css">
@page {padding: 0pt; margin:0pt}
body { text-align: center; padding:0pt; margin: 0pt; }
div { margin: 0pt; padding: 0pt; }
</style>
</head>
<body>
<div>
<img src="%s" alt="cover" style="height: 100%%" />
</div>
</body>
</html>
'''
def create_cover_image(src, dest, screen_size, rescale_cover=True):
try:
from PyQt4.Qt import QImage, Qt
if QApplication.instance() is None:
QApplication([])
im = QImage()
im.load(src)
if im.isNull():
raise ValueError('Invalid cover image')
if rescale_cover and screen_size is not None:
width, height = im.width(), im.height()
dw, dh = (screen_size[0]-width)/float(width), (screen_size[1]-height)/float(height)
delta = min(dw, dh)
if delta > 0:
nwidth = int(width + delta*(width))
nheight = int(height + delta*(height))
im = im.scaled(int(nwidth), int(nheight), Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
im.save(dest)
except:
import traceback
traceback.print_exc()
return False
return True
def process_title_page(mi, filelist, htmlfilemap, opts, tdir):
old_title_page = None
f = lambda x : os.path.normcase(os.path.normpath(x))
if not isinstance(mi.cover, basestring):
mi.cover = None
if mi.cover:
if f(filelist[0].path) == f(mi.cover):
old_title_page = htmlfilemap[filelist[0].path]
#logger = logging.getLogger('html2epub')
metadata_cover = mi.cover
if metadata_cover and not os.path.exists(metadata_cover):
metadata_cover = None
cpath = '/'.join(('resources', '_cover_.jpg'))
cover_dest = os.path.join(tdir, 'content', *cpath.split('/'))
if metadata_cover is not None:
if not create_cover_image(metadata_cover, cover_dest,
opts.profile.screen_size):
metadata_cover = None
specified_cover = opts.cover
if specified_cover and not os.path.exists(specified_cover):
specified_cover = None
if specified_cover is not None:
if not create_cover_image(specified_cover, cover_dest,
opts.profile.screen_size):
specified_cover = None
cover = metadata_cover if specified_cover is None or (opts.prefer_metadata_cover and metadata_cover is not None) else specified_cover
if cover is not None:
titlepage = TITLEPAGE%cpath
tp = 'calibre_title_page.html' if old_title_page is None else old_title_page
tppath = os.path.join(tdir, 'content', tp)
with open(tppath, 'wb') as f:
f.write(titlepage)
return tp if old_title_page is None else None, True
elif os.path.exists(cover_dest):
os.remove(cover_dest)
return None, old_title_page is not None
def find_oeb_cover(htmlfile):
if os.stat(htmlfile).st_size > 2048:
return None
match = re.search(r'(?i)<img[^<>]+src\s*=\s*[\'"](.+?)[\'"]', open(htmlfile, 'rb').read())
if match:
return match.group(1)
def condense_ncx(ncx_path):
tree = etree.parse(ncx_path)
for tag in tree.getroot().iter(tag=etree.Element):
if tag.text:
tag.text = tag.text.strip()
if tag.tail:
tag.tail = tag.tail.strip()
compressed = etree.tostring(tree.getroot(), encoding='utf-8')
open(ncx_path, 'wb').write(compressed)
def convert(htmlfile, opts, notification=None, create_epub=True,
oeb_cover=False, extract_to=None):
htmlfile = os.path.abspath(htmlfile)
if opts.output is None:
opts.output = os.path.splitext(os.path.basename(htmlfile))[0] + '.epub'
opts.profile = PROFILES[opts.profile]
opts.output = os.path.abspath(opts.output)
if opts.override_css is not None:
try:
opts.override_css = open(opts.override_css, 'rb').read().decode(preferred_encoding, 'replace')
except:
opts.override_css = opts.override_css.decode(preferred_encoding, 'replace')
if opts.from_opf:
opts.from_opf = os.path.abspath(opts.from_opf)
if opts.from_ncx:
opts.from_ncx = os.path.abspath(opts.from_ncx)
if htmlfile.lower().endswith('.opf'):
opf = OPF(htmlfile, os.path.dirname(os.path.abspath(htmlfile)))
filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
if not filelist:
# Bad OPF look for a HTML file instead
htmlfile = find_html_index(walk(os.path.dirname(htmlfile)))[0]
if htmlfile is None:
raise ValueError('Could not find suitable file to convert.')
filelist = get_filelist(htmlfile, opts)[1]
mi = merge_metadata(None, opf, opts)
else:
opf, filelist = get_filelist(htmlfile, opts)
mi = merge_metadata(htmlfile, opf, opts)
opts.chapter = XPath(opts.chapter,
namespaces={'re':'http://exslt.org/regular-expressions'})
for x in (1, 2, 3):
attr = 'level%d_toc'%x
if getattr(opts, attr):
setattr(opts, attr, XPath(getattr(opts, attr),
namespaces={'re':'http://exslt.org/regular-expressions'}))
else:
setattr(opts, attr, None)
with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir:
if opts.keep_intermediate:
print 'Intermediate files in', tdir
resource_map, htmlfile_map, generated_toc, stylesheet_map = \
parse_content(filelist, opts, tdir)
logger = logging.getLogger('html2epub')
resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
title_page, has_title_page = process_title_page(mi, filelist, htmlfile_map, opts, tdir)
spine = [htmlfile_map[f.path] for f in filelist]
if not oeb_cover and title_page is not None:
spine = [title_page] + spine
mi.cover = None
mi.cover_data = (None, None)
mi = create_metadata(tdir, mi, spine, resources)
buf = cStringIO.StringIO()
if mi.toc:
rebase_toc(mi.toc, htmlfile_map, tdir)
if opts.use_auto_toc or mi.toc is None or len(list(mi.toc.flat())) < 2:
mi.toc = generated_toc
if opts.from_ncx:
toc = TOC()
toc.read_ncx_toc(opts.from_ncx)
mi.toc = toc
for item in mi.manifest:
if getattr(item, 'mime_type', None) == 'text/html':
item.mime_type = 'application/xhtml+xml'
opf_path = os.path.join(tdir, 'metadata.opf')
with open(opf_path, 'wb') as f:
mi.render(f, buf, 'toc.ncx')
toc = buf.getvalue()
if toc:
with open(os.path.join(tdir, 'toc.ncx'), 'wb') as f:
f.write(toc)
if opts.show_ncx:
print toc
split(opf_path, opts, stylesheet_map)
if opts.page:
logger.info('\tBuilding page map...')
add_page_map(opf_path, opts)
check_links(opf_path, opts.pretty_print)
opf = OPF(opf_path, tdir)
opf.remove_guide()
oeb_cover_file = None
if oeb_cover and title_page is not None:
oeb_cover_file = find_oeb_cover(os.path.join(tdir, 'content', title_page))
if has_title_page or (oeb_cover and oeb_cover_file):
opf.create_guide_element()
if has_title_page and not oeb_cover:
opf.add_guide_item('cover', 'Cover', 'content/'+spine[0])
if oeb_cover and oeb_cover_file:
opf.add_guide_item('cover', 'Cover', 'content/'+oeb_cover_file)
cpath = os.path.join(tdir, 'content', 'resources', '_cover_.jpg')
if os.path.exists(cpath):
opf.add_path_to_manifest(cpath, 'image/jpeg')
with open(opf_path, 'wb') as f:
f.write(opf.render())
ncx_path = os.path.join(os.path.dirname(opf_path), 'toc.ncx')
if os.path.exists(ncx_path) and os.stat(ncx_path).st_size > opts.profile.flow_size:
logger.info('Condensing NCX from %d bytes...'%os.stat(ncx_path).st_size)
condense_ncx(ncx_path)
if os.stat(ncx_path).st_size > opts.profile.flow_size:
logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size)
if opts.profile.screen_size is not None:
rescale_images(os.path.join(tdir, 'content', 'resources'),
opts.profile.screen_size, logger)
if create_epub:
epub = initialize_container(opts.output)
epub.add_dir(tdir)
epub.close()
run_plugins_on_postprocess(opts.output, 'epub')
logger.info(_('Output written to ')+opts.output)
if opts.show_opf:
print open(opf_path, 'rb').read()
if opts.extract_to is not None:
if os.path.exists(opts.extract_to):
shutil.rmtree(opts.extract_to)
shutil.copytree(tdir, opts.extract_to)
if extract_to is not None:
if os.path.exists(extract_to):
shutil.rmtree(extract_to)
shutil.copytree(tdir, extract_to)
def main(args=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) < 2:
parser.print_help()
print _('You must specify an input HTML file')
return 1
convert(args[1], opts)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -0,0 +1,239 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from urllib import unquote
from calibre.customize.conversion import OutputFormatPlugin
from calibre.ptempfile import TemporaryDirectory
from calibre.constants import __appname__, __version__
from calibre import strftime, guess_type
from lxml import etree
class EPUBOutput(OutputFormatPlugin):
name = 'EPUB Output'
author = 'Kovid Goyal'
file_type = 'epub'
TITLEPAGE_COVER = '''\
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<title>Cover</title>
<style type="text/css" title="override_css">
@page {padding: 0pt; margin:0pt}
body { text-align: center; padding:0pt; margin: 0pt; }
div { margin: 0pt; padding: 0pt; }
</style>
</head>
<body>
<div>
<img src="%s" alt="cover" style="height: 100%%" />
</div>
</body>
</html>
'''
TITLEPAGE = '''\
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<style type="text/css">
body {
background: white no-repeat fixed center center;
text-align: center;
vertical-align: center;
overflow: hidden;
font-size: 18px;
}
h1 { font-family: serif; }
h2, h4 { font-family: monospace; }
</style>
</head>
<body>
<h1>%(title)s</h1>
<br/><br/>
<div style="position:relative">
<div style="position: absolute; left: 0; top: 0; width:100%%; height:100%%; vertical-align:center">
<img src="%(img)s" alt="calibre" style="opacity:0.3"/>
</div>
<div style="position: absolute; left: 0; top: 0; width:100%%; height:100%%; vertical-align:center">
<h2>%(date)s</h2>
<br/><br/><br/><br/><br/>
<h3>%(author)s</h3>
<br/><br/></br/><br/><br/><br/><br/><br/><br/>
<h4>Produced by %(app)s</h4>
</div>
</div>
</body>
</html>
'''
def convert(self, oeb, output_path, input_plugin, opts, log):
self.log, self.opts, self.oeb = log, opts, oeb
self.workaround_ade_quirks()
from calibre.ebooks.oeb.transforms.rescale import RescaleImages
RescaleImages()(oeb, opts)
self.insert_cover()
with TemporaryDirectory('_epub_output') as tdir:
from calibre.customize.ui import plugin_for_output_format
oeb_output = plugin_for_output_format('oeb')
oeb_output.convert(oeb, tdir, input_plugin, opts, log)
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
if x.endswith('.ncx')][0])
from calibre.epub import initialize_container
epub = initialize_container(output_path, os.path.basename(opf))
epub.add_dir(tdir)
epub.close()
def default_cover(self):
'''
Create a generic cover for books that dont have a cover
'''
try:
from calibre.gui2 import images_rc # Needed for access to logo
from PyQt4.Qt import QApplication, QFile, QIODevice
except:
return None
from calibre.ebooks.metadata import authors_to_string
images_rc
m = self.oeb.metadata
title = unicode(m.title[0])
a = [unicode(x) for x in m.creators if m.role == 'aut']
author = authors_to_string(a)
if QApplication.instance() is None: QApplication([])
f = QFile(':/library')
f.open(QIODevice.ReadOnly)
img_data = str(f.readAll())
id, href = self.oeb.manifest.generate('calibre-logo',
'calibre-logo.png')
self.oeb.manifest.add(id, href, 'image/png', data=img_data)
html = self.TITLEPAGE%dict(title=title, author=author,
date=strftime('%d %b, %Y'),
app=__appname__ +' '+__version__,
img=href)
id, href = self.oeb.manifest.generate('calibre-titlepage',
'calibre-titlepage.xhtml')
return self.oeb.manifest.add(id, href, guess_type('t.xhtml')[0],
data=etree.fromstring(html))
def insert_cover(self):
from calibre.ebooks.oeb.base import urldefrag
from calibre import guess_type
g, m = self.oeb.guide, self.oeb.manifest
if 'titlepage' not in g:
if 'cover' in g:
tp = self.TITLEPAGE_COVER%unquote(g['cover'].href)
id, href = m.generate('titlepage', 'titlepage.xhtml')
item = m.add(id, href, guess_type('t.xhtml'),
data=etree.fromstring(tp))
else:
item = self.default_cover()
else:
item = self.oeb.manifest.hrefs[
urldefrag(self.oeb.guide['titlepage'].href)[0]]
if item is not None:
self.oeb.spine.insert(0, item, True)
self.oeb.guide.refs['cover'].href = item.href
self.oeb.guide.refs['titlepage'].href = item.href
def condense_ncx(self, ncx_path):
if not self.opts.pretty_print:
tree = etree.parse(ncx_path)
for tag in tree.getroot().iter(tag=etree.Element):
if tag.text:
tag.text = tag.text.strip()
if tag.tail:
tag.tail = tag.tail.strip()
compressed = etree.tostring(tree.getroot(), encoding='utf-8')
open(ncx_path, 'wb').write(compressed)
def workaround_ade_quirks(self):
'''
Perform various markup transforms to get the output to render correctly
in the quirky ADE.
'''
from calibre.ebooks.oeb.base import XPNSMAP, XHTML
from lxml.etree import XPath as _XPath
from functools import partial
XPath = partial(_XPath, namespaces=XPNSMAP)
for x in self.oeb.spine:
root = x.data
body = XPath('//h:body')(root)
if body:
body = body[0]
# Replace <br> that are children of <body> as ADE doesn't handle them
if hasattr(body, 'xpath'):
for br in body.xpath('./h:br'):
if br.getparent() is None:
continue
try:
sibling = br.itersiblings().next()
except:
sibling = None
br.tag = XHTML('p')
br.text = u'\u00a0'
if (br.tail and br.tail.strip()) or sibling is None or \
getattr(sibling, 'tag', '') != XHTML('br'):
style = br.get('style', '').split(';')
style = filter(None, map(lambda x: x.strip(), style))
style.append('margin: 0pt; border:0pt; height:0pt')
br.set('style', '; '.join(style))
else:
sibling.getparent().remove(sibling)
if sibling.tail:
if not br.tail:
br.tail = ''
br.tail += sibling.tail
if self.opts.output_profile.remove_object_tags:
for tag in root.xpath('//h:embed'):
tag.getparent().remove(tag)
for tag in root.xpath('//h:object'):
if tag.get('type', '').lower().strip() in ('image/svg+xml',):
continue
tag.getparent().remove(tag)
for tag in root.xpath('//h:title|//h:style'):
if not tag.text:
tag.getparent().remove(tag)
for tag in root.xpath('//h:script'):
if not tag.text and not tag.get('src', False):
tag.getparent().remove(tag)
for tag in root.xpath('//h:form'):
tag.getparent().remove(tag)
for tag in root.xpath('//h:center'):
tag.tag = XHTML('div')
tag.set('style', 'text-align:center')
# ADE can't handle &amp; in an img url
for tag in self.root.xpath('//h:img[@src]'):
tag.set('src', tag.get('src', '').replace('&', ''))
stylesheet = self.oeb.manifest.hrefs['stylesheet.css']
stylesheet.data.add('a { color: inherit; text-decoration: inherit; '
'cursor: default; }')
stylesheet.data.add('a[href] { color: blue; '
'text-decoration: underline; cursor:pointer; }')

View File

@ -260,6 +260,9 @@ class MetaInformation(object):
x = 1.0
return '%d'%x if int(x) == x else '%.2f'%x
def authors_from_string(self, raw):
self.authors = string_to_authors(raw)
def __unicode__(self):
ans = []
def fmt(x, y):

View File

@ -514,7 +514,8 @@ class Metadata(object):
scheme = Attribute(lambda term: 'scheme' if \
term == OPF('meta') else OPF('scheme'),
[DC('identifier'), OPF('meta')])
file_as = Attribute(OPF('file-as'), [DC('creator'), DC('contributor')])
file_as = Attribute(OPF('file-as'), [DC('creator'), DC('contributor'),
DC('title')])
role = Attribute(OPF('role'), [DC('creator'), DC('contributor')])
event = Attribute(OPF('event'), [DC('date')])
id = Attribute('id')
@ -593,6 +594,19 @@ class Metadata(object):
yield key
__iter__ = iterkeys
def clear(self, key):
l = self.items[key]
for x in list(l):
l.remove(x)
def filter(self, key, predicate):
l = self.items[key]
for x in list(l):
if predicate(x):
l.remove(x)
def __getitem__(self, key):
return self.items[key]
@ -1011,7 +1025,7 @@ class Manifest(object):
media_type = OEB_DOC_MIME
elif media_type in OEB_STYLES:
media_type = OEB_CSS_MIME
attrib = {'id': item.id, 'href': item.href,
attrib = {'id': item.id, 'href': urlunquote(item.href),
'media-type': media_type}
if item.fallback:
attrib['fallback'] = item.fallback
@ -1202,6 +1216,9 @@ class Guide(object):
self.refs[type] = ref
return ref
def remove(self, type):
return self.refs.pop(type, None)
def iterkeys(self):
for type in self.refs:
yield type
@ -1229,7 +1246,7 @@ class Guide(object):
def to_opf1(self, parent=None):
elem = element(parent, 'guide')
for ref in self.refs.values():
attrib = {'type': ref.type, 'href': ref.href}
attrib = {'type': ref.type, 'href': urlunquote(ref.href)}
if ref.title:
attrib['title'] = ref.title
element(elem, 'reference', attrib=attrib)
@ -1345,7 +1362,7 @@ class TOC(object):
def to_opf1(self, tour):
for node in self.nodes:
element(tour, 'site', attrib={
'title': node.title, 'href': node.href})
'title': node.title, 'href': urlunquote(node.href)})
node.to_opf1(tour)
return tour
@ -1358,7 +1375,7 @@ class TOC(object):
point = element(parent, NCX('navPoint'), attrib=attrib)
label = etree.SubElement(point, NCX('navLabel'))
element(label, NCX('text')).text = node.title
element(point, NCX('content'), src=node.href)
element(point, NCX('content'), src=urlunquote(node.href))
node.to_ncx(point)
return parent

View File

@ -12,13 +12,15 @@ from cStringIO import StringIO
from PyQt4.Qt import QFontDatabase
from calibre.customize.ui import available_input_formats
from calibre.ebooks.epub.from_html import TITLEPAGE
from calibre.ebooks.metadata.opf2 import OPF
from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.chardet import xml_to_unicode
from calibre.utils.zipfile import safe_replace, ZipFile
from calibre.utils.config import DynamicConfig
from calibre.utils.logging import Log
from calibre.ebooks.epub.output import EPUBOutput
TITLEPAGE = EPUBOutput.TITLEPAGE_COVER
def character_count(html):
'''

View File

@ -9,6 +9,7 @@ from lxml import etree
from calibre.customize.conversion import OutputFormatPlugin
from calibre import CurrentDir
from urllib import unquote
class OEBOutput(OutputFormatPlugin):
@ -32,7 +33,7 @@ class OEBOutput(OutputFormatPlugin):
f.write(raw)
for item in oeb_book.manifest:
path = os.path.abspath(item.href)
path = os.path.abspath(unquote(item.href))
dir = os.path.dirname(path)
if not os.path.exists(dir):
os.makedirs(dir)

View File

@ -11,6 +11,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import os
import itertools
import re
import logging
import copy
from weakref import WeakKeyDictionary
from xml.dom import SyntaxErr as CSSSyntaxError
@ -106,7 +107,8 @@ class CSSSelector(etree.XPath):
class Stylizer(object):
STYLESHEETS = WeakKeyDictionary()
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'], extra_css=''):
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'],
extra_css='', user_css=''):
self.oeb = oeb
self.profile = profile
self.logger = oeb.logger
@ -115,7 +117,8 @@ class Stylizer(object):
cssname = os.path.splitext(basename)[0] + '.css'
stylesheets = [HTML_CSS_STYLESHEET]
head = xpath(tree, '/h:html/h:head')[0]
parser = cssutils.CSSParser(fetcher=self._fetch_css_file)
parser = cssutils.CSSParser(fetcher=self._fetch_css_file,
log=logging.getLogger('calibre.css'))
for elem in head:
if elem.tag == XHTML('style') and elem.text \
and elem.get('type', CSS_MIME) in OEB_STYLES:
@ -135,8 +138,9 @@ class Stylizer(object):
(path, item.href))
continue
stylesheets.append(sitem.data)
if extra_css:
text = XHTML_CSS_NAMESPACE + extra_css
for x in (extra_css, user_css):
if x:
text = XHTML_CSS_NAMESPACE + x
stylesheet = parser.parseString(text, href=cssname)
stylesheet.namespaces['h'] = XHTML_NS
stylesheets.append(stylesheet)
@ -288,6 +292,9 @@ class Style(object):
self._lineHeight = None
stylizer._styles[element] = self
def set(self, prop, val):
self._style[prop] = val
def _update_cssdict(self, cssdict):
self._style.update(cssdict)

View File

@ -114,12 +114,27 @@ class CSSFlattener(object):
def stylize_spine(self):
self.stylizers = {}
profile = self.context.source
css = ''
for item in self.oeb.spine:
html = item.data
body = html.find(XHTML('body'))
bs = body.get('style', '').split(';')
bs.append('margin-top: 0pt')
bs.append('margin-bottom: 0pt')
bs.append('margin-left : %fpt'%\
float(self.context.margin_left))
bs.append('margin-right : %fpt'%\
float(self.context.margin_right))
bs.append('text-align: '+ \
('left' if self.context.dont_justify else 'justify'))
body.set('style', '; '.join(bs))
stylizer = Stylizer(html, item.href, self.oeb, profile,
extra_css=self.context.extra_css)
user_css=self.context.extra_css,
extra_css=css)
self.stylizers[item] = stylizer
def baseline_node(self, node, stylizer, sizes, csize):
csize = stylizer.style(node)['font-size']
if node.text:
@ -219,6 +234,15 @@ class CSSFlattener(object):
if self.lineh and 'line-height' not in cssdict:
lineh = self.lineh / psize
cssdict['line-height'] = "%0.5fem" % lineh
if (self.context.remove_paragraph_spacing or
self.context.insert_blank_line) and tag in ('p', 'div'):
for prop in ('margin', 'padding', 'border'):
for edge in ('top', 'bottom'):
cssdict['%s-%s'%(prop, edge)] = '0pt'
if self.context.insert_blank_line:
cssdict['margin-top'] = cssdict['margin-bottom'] = '0.5em'
if self.context.remove_paragraph_spacing:
cssdict['text-indent'] = '1.5em'
if cssdict:
items = cssdict.items()
items.sort()
@ -253,7 +277,11 @@ class CSSFlattener(object):
href = item.relhref(href)
etree.SubElement(head, XHTML('link'),
rel='stylesheet', type=CSS_MIME, href=href)
if stylizer.page_rule:
stylizer.page_rule['margin-top'] = '%fpt'%\
float(self.context.margin_top)
stylizer.page_rule['margin-bottom'] = '%fpt'%\
float(self.context.margin_bottom)
items = stylizer.page_rule.items()
items.sort()
css = '; '.join("%s: %s" % (key, val) for key, val in items)
@ -285,3 +313,4 @@ class CSSFlattener(object):
for item in self.oeb.spine:
stylizer = self.stylizers[item]
self.flatten_head(item, stylizer, href)

View File

@ -0,0 +1,52 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
class Clean(object):
'''Clean up guide, leaving only a pointer to the cover'''
def __call__(self, oeb, opts):
from calibre.ebooks.oeb.base import urldefrag
self.oeb, self.log, self.opts = oeb, oeb.log, opts
protected_hrefs = set([])
if 'titlepage' in self.oeb.guide:
protected_hrefs.add(urldefrag(
self.oeb.guide['titlepage'].href)[0])
if 'cover' not in self.oeb.guide:
covers = []
for x in ('other.ms-coverimage-standard',
'other.ms-titleimage-standard', 'other.ms-titleimage',
'other.ms-coverimage', 'other.ms-thumbimage-standard',
'other.ms-thumbimage'):
if x in self.oeb.guide:
href = self.oeb.guide[x].href
item = self.oeb.manifest.hrefs[href]
covers.append([self.oeb.guide[x], len(item.data)])
covers.sort(cmp=lambda x,y:cmp(x[1], y[1]), reverse=True)
if covers:
ref = covers[0][0]
if len(covers) > 1:
self.log('Choosing %s:%s as the cover'%(ref.type, ref.href))
ref.type = 'cover'
self.oeb.guide.refs['cover'] = ref
protected_hrefs.add(urldefrag(ref.href)[0])
else:
protected_hrefs.add(urldefrag(self.oeb.guide.refs['cover'].href)[0])
for x in list(self.oeb.guide):
href = urldefrag(self.oeb.guide[x].href)[0]
if x.lower() != ('cover', 'titlepage'):
try:
if href not in protected_hrefs:
self.oeb.manifest.remove(self.oeb.manifest.hrefs[href])
except KeyError:
pass
self.oeb.guide.remove(x)

View File

@ -0,0 +1,66 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import textwrap
from lxml import etree
from calibre.ebooks.oeb.base import XPNSMAP
from calibre import guess_type
class Jacket(object):
'''
Book jacket manipulation. Remove first image and insert comments at start of
book.
'''
JACKET_TEMPLATE = textwrap.dedent(u'''\
<html xmlns="%(xmlns)s">
<head>
<title>%(title)s</title>
</head>
<body>
<h1 style="text-align: center">%(title)s</h1>
<h2 style="text-align: center">%(jacket)s</h2>
<div>
%(comments)s
</div>
</body>
</html>
''')
def remove_first_image(self):
for i, item in enumerate(self.oeb.spine):
if i > 2: break
for img in item.data.xpath('//h:img[@src]', namespace=XPNSMAP):
href = item.abshref(img.get('src'))
image = self.oeb.manifest.hrefs.get(href, None)
if image is not None:
self.log('Removing first image', img.get('src'))
self.oeb.manifest.remove(image)
img.getparent().remove(img)
return
def insert_comments(self, comments):
self.log('Inserting metadata comments into book...')
comments = comments.replace('\r\n', '\n').replace('\n\n', '<br/><br/>')
html = self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'],
title=self.opts.title, comments=comments,
jacket=_('Book Jacket'))
id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml')
root = etree.fromstring(html)
item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root)
self.oeb.spine.insert(0, item, True)
def __call__(self, oeb, opts):
self.oeb, self.opts, self.log = oeb, opts, oeb.log
if opts.remove_first_image:
self.remove_fisrt_image()
if opts.insert_comments and opts.comments:
self.insert_comments(opts.comments)

View File

@ -0,0 +1,84 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
class MergeMetadata(object):
'Merge in user metadata, including cover'
def __call__(self, oeb, mi, prefer_metadata_cover=False):
from calibre.ebooks.oeb.base import DC
self.oeb, self.log = oeb, oeb.log
m = self.oeb.metadata
self.log('Merging user specified metadata...')
if mi.title:
m.clear('title')
m.add('title', mi.title)
if mi.title_sort:
if not m.title:
m.add(DC('title'), mi.title_sort)
m.title[0].file_as = mi.title_sort
if mi.authors:
m.filter('creator', lambda x : x.role.lower() == 'aut')
for a in mi.authors:
attrib = {'role':'aut'}
if mi.author_sort:
attrib['file_as'] = mi.author_sort
m.add('creator', a, attrib=attrib)
if mi.comments:
m.clear('description')
m.add('description', mi.comments)
if mi.publisher:
m.clear('publisher')
m.add('publisher', mi.publisher)
if mi.series:
m.clear('series')
m.add('series', mi.series)
if mi.isbn:
has = False
for x in m.identifier:
if x.scheme.lower() == 'isbn':
x.content = mi.isbn
has = True
if not has:
m.add('identifier', mi.isbn, scheme='ISBN')
if mi.language:
m.clear('language')
m.add('language', mi.language)
if mi.book_producer:
m.filter('creator', lambda x : x.role.lower() == 'bkp')
m.add('creator', mi.book_producer, role='bkp')
if mi.series_index is not None:
m.clear('series_index')
m.add('series_index', '%.2f'%mi.series_index)
if mi.rating is not None:
m.clear('rating')
m.add('rating', '%.2f'%mi.rating)
if mi.tags:
m.clear('subject')
for t in mi.tags:
m.add('subject', t)
self.set_cover(mi, prefer_metadata_cover)
def set_cover(self, mi, prefer_metadata_cover):
cdata = ''
if mi.cover and os.access(mi.cover, os.R_OK):
cdata = open(mi.cover, 'rb').read()
elif mi.cover_data and mi.cover_data[-1]:
cdata = mi.cover_data[1]
if not cdata: return
if 'cover' in self.oeb.guide:
if not prefer_metadata_cover:
href = self.oeb.guide['cover'].href
self.oeb.manifest.hrefs[href]._data = cdata
else:
id, href = self.oeb.manifest.generate('cover', 'cover.jpg')
self.oeb.manifest.add(id, href, 'image/jpeg', data=cdata)
self.oeb.guide.add('cover', 'Cover', href)

View File

@ -0,0 +1,37 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre import fit_image
class RescaleImages(object):
'Rescale all images to fit inside given screen size'
def __call__(self, oeb, opts):
from PyQt4.Qt import QApplication, QImage, Qt
from calibre.gui2 import pixmap_to_data
self.oeb, self.opts, self.log = oeb, opts, oeb.log
page_width, page_height = opts.dest.width, opts.dest.height
for item in oeb.manifest:
if item.media_type.startswith('image'):
raw = item.data
if not raw: continue
if QApplication.instance() is None:
QApplication([])
img = QImage(10, 10, QImage.Format_ARGB32_Premultiplied)
if not img.loadFromData(raw): continue
width, height = img.width(), img.height()
scaled, new_width, new_height = fit_image(width, height,
page_width, page_height)
if scaled:
self.log('Rescaling image', item.href)
img = img.scaled(new_width, new_height,
Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
item.data = pixmap_to_data(img)

View File

@ -16,8 +16,8 @@ from lxml import etree
from lxml.cssselect import CSSSelector
from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \
urldefrag, rewrite_links
from calibre.ebooks.epub import tostring, rules
urldefrag, rewrite_links, urlunquote
from calibre.ebooks.epub import rules
XPath = functools.partial(_XPath, namespaces=NAMESPACES)
@ -25,6 +25,9 @@ XPath = functools.partial(_XPath, namespaces=NAMESPACES)
SPLIT_ATTR = 'cs'
SPLIT_POINT_ATTR = 'csp'
def tostring(root):
return etree.tostring(root, encoding='utf-8')
class SplitError(ValueError):
def __init__(self, path, root):
@ -142,7 +145,7 @@ class Split(object):
nhref = anchor_map[frag if frag else None]
nhref = self.current_item.relhref(nhref)
if frag:
nhref = '#'.join((nhref, frag))
nhref = '#'.join((urlunquote(nhref), frag))
return nhref
return url

View File

@ -11,7 +11,7 @@ import re
from lxml import etree
from urlparse import urlparse
from calibre.ebooks.oeb.base import XPNSMAP, TOC
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
XPath = lambda x: etree.XPath(x, namespaces=XPNSMAP)
class DetectStructure(object):
@ -63,11 +63,11 @@ class DetectStructure(object):
if chapter_mark == 'none':
continue
elif chapter_mark == 'rule':
mark = etree.Element('hr')
mark = etree.Element(XHTML('hr'))
elif chapter_mark == 'pagebreak':
mark = etree.Element('div', style=page_break_after)
mark = etree.Element(XHTML('div'), style=page_break_after)
else: # chapter_mark == 'both':
mark = etree.Element('hr', style=page_break_before)
mark = etree.Element(XHTML('hr'), style=page_break_before)
elem.addprevious(mark)
def create_level_based_toc(self):
@ -114,12 +114,13 @@ class DetectStructure(object):
def add_leveled_toc_items(self, item):
level1 = XPath(self.opts.level1_toc)(item.data)
level1_order = []
document = item
counter = 1
if level1:
added = {}
for elem in level1:
text, _href = self.elem_to_link(item, elem, counter)
text, _href = self.elem_to_link(document, elem, counter)
counter += 1
if text:
node = self.oeb.toc.add(text, _href,
@ -132,11 +133,11 @@ class DetectStructure(object):
level2 = list(XPath(self.opts.level2_toc)(item.data))
for elem in level2:
level1 = None
for item in item.data.iterdescendants():
for item in document.data.iterdescendants():
if item in added.keys():
level1 = added[item]
elif item == elem and level1 is not None:
text, _href = self.elem_to_link(item, elem, counter)
text, _href = self.elem_to_link(document, elem, counter)
counter += 1
if text:
added2[elem] = level1.add(text, _href,
@ -145,12 +146,12 @@ class DetectStructure(object):
level3 = list(XPath(self.opts.level3_toc)(item.data))
for elem in level3:
level2 = None
for item in item.data.iterdescendants():
for item in document.data.iterdescendants():
if item in added2.keys():
level2 = added2[item]
elif item == elem and level2 is not None:
text, _href = \
self.elem_to_link(item, elem, counter)
self.elem_to_link(document, elem, counter)
counter += 1
if text:
level2.add(text, _href,

View File

@ -68,7 +68,7 @@ class PdbHeaderWriter(object):
def build_header(self, offsets):
'''
Sections is a list of section offsets
Offsets is a list of section offsets
'''

View File

@ -1,292 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
The GUI for conversion to EPUB.
'''
import os, uuid
from PyQt4.Qt import QDialog, QSpinBox, QDoubleSpinBox, QComboBox, QLineEdit, \
QTextEdit, QCheckBox, Qt, QPixmap, QIcon, QListWidgetItem, SIGNAL
from lxml.etree import XPath
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
from calibre.gui2.dialogs.epub_ui import Ui_Dialog
from calibre.gui2 import error_dialog, choose_images, pixmap_to_data, ResizableDialog
from calibre.ebooks.epub.from_any import SOURCE_FORMATS, config as epubconfig
from calibre.ebooks.metadata import MetaInformation
from calibre.ptempfile import PersistentTemporaryFile
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata import authors_to_string, string_to_authors
class Config(ResizableDialog, Ui_Dialog):
OUTPUT = 'EPUB'
def __init__(self, parent, db, row=None, config=epubconfig):
ResizableDialog.__init__(self, parent)
self.hide_controls()
self.connect(self.category_list, SIGNAL('itemEntered(QListWidgetItem *)'),
self.show_category_help)
self.connect(self.cover_button, SIGNAL("clicked()"), self.select_cover)
self.cover_changed = False
self.db = db
self.id = None
self.row = row
if row is not None:
self.id = db.id(row)
base = config().as_string() + '\n\n'
defaults = self.db.conversion_options(self.id, self.OUTPUT.lower())
defaults = base + (defaults if defaults else '')
self.config = config(defaults=defaults)
else:
self.config = config()
self.initialize()
self.get_source_format()
self.category_list.setCurrentRow(0)
if self.row is None:
self.setWindowTitle(_('Bulk convert to ')+self.OUTPUT)
else:
self.setWindowTitle((_(u'Convert %s to ')%unicode(self.title.text()))+self.OUTPUT)
def hide_controls(self):
self.source_profile_label.setVisible(False)
self.opt_source_profile.setVisible(False)
self.dest_profile_label.setVisible(False)
self.opt_dest_profile.setVisible(False)
self.opt_toc_title.setVisible(False)
self.toc_title_label.setVisible(False)
self.opt_rescale_images.setVisible(False)
self.opt_ignore_tables.setVisible(False)
self.opt_prefer_author_sort.setVisible(False)
def initialize(self):
self.__w = []
self.__w.append(QIcon(':/images/dialog_information.svg'))
self.item1 = QListWidgetItem(self.__w[-1], _('Metadata'), self.category_list)
self.__w.append(QIcon(':/images/lookfeel.svg'))
self.item2 = QListWidgetItem(self.__w[-1], _('Look & Feel').replace(' ','\n'), self.category_list)
self.__w.append(QIcon(':/images/page.svg'))
self.item3 = QListWidgetItem(self.__w[-1], _('Page Setup').replace(' ','\n'), self.category_list)
self.__w.append(QIcon(':/images/chapters.svg'))
self.item4 = QListWidgetItem(self.__w[-1], _('Chapter Detection').replace(' ','\n'), self.category_list)
self.setup_tooltips()
self.initialize_options()
def set_help(self, msg):
if msg and getattr(msg, 'strip', lambda:True)():
self.help_view.setPlainText(msg)
def setup_tooltips(self):
for opt in self.config.option_set.preferences:
g = getattr(self, 'opt_'+opt.name, False)
if opt.help and g:
help = opt.help.replace('%default', str(opt.default))
g._help = help
g.setToolTip(help.replace('<', '&lt;').replace('>', '&gt;'))
g.setWhatsThis(help.replace('<', '&lt;').replace('>', '&gt;'))
g.__class__.enterEvent = lambda obj, event: self.set_help(getattr(obj, '_help', obj.toolTip()))
def show_category_help(self, item):
text = unicode(item.text())
help = {
_('Metadata') : _('Specify metadata such as title and author for the book.\n\nMetadata will be updated in the database as well as the generated %s file.')%self.OUTPUT,
_('Look & Feel') : _('Adjust the look of the generated ebook by specifying things like font sizes.'),
_('Page Setup') : _('Specify the page layout settings like margins.'),
_('Chapter Detection') : _('Fine tune the detection of chapter and section headings.'),
}
self.set_help(help[text.replace('\n', ' ')])
def select_cover(self):
files = choose_images(self, 'change cover dialog',
_('Choose cover for ') + unicode(self.title.text()))
if not files:
return
_file = files[0]
if _file:
_file = os.path.abspath(_file)
if not os.access(_file, os.R_OK):
d = error_dialog(self.window, _('Cannot read'),
_('You do not have permission to read the file: ') + _file)
d.exec_()
return
cf, cover = None, None
try:
cf = open(_file, "rb")
cover = cf.read()
except IOError, e:
d = error_dialog(self.window, _('Error reading file'),
_("<p>There was an error reading from file: <br /><b>") + _file + "</b></p><br />"+str(e))
d.exec_()
if cover:
pix = QPixmap()
pix.loadFromData(cover)
if pix.isNull():
d = error_dialog(self.window, _('Error reading file'),
_file + _(" is not a valid picture"))
d.exec_()
else:
self.cover_path.setText(_file)
self.cover.setPixmap(pix)
self.cover_changed = True
self.cpixmap = pix
def initialize_metadata_options(self):
all_series = self.db.all_series()
all_series.sort(cmp=lambda x, y : cmp(x[1], y[1]))
for series in all_series:
self.series.addItem(series[1])
self.series.setCurrentIndex(-1)
if self.row is not None:
mi = self.db.get_metadata(self.id, index_is_id=True)
self.title.setText(mi.title)
if mi.authors:
self.author.setText(authors_to_string(mi.authors))
else:
self.author.setText('')
self.publisher.setText(mi.publisher if mi.publisher else '')
self.author_sort.setText(mi.author_sort if mi.author_sort else '')
self.tags.setText(', '.join(mi.tags if mi.tags else []))
self.comment.setText(mi.comments if mi.comments else '')
if mi.series:
self.series.setCurrentIndex(self.series.findText(mi.series))
if mi.series_index is not None:
self.series_index.setValue(mi.series_index)
cover = self.db.cover(self.id, index_is_id=True)
if cover:
pm = QPixmap()
pm.loadFromData(cover)
if not pm.isNull():
self.cover.setPixmap(pm)
def get_title_and_authors(self):
title = unicode(self.title.text()).strip()
if not title:
title = _('Unknown')
authors = unicode(self.author.text()).strip()
authors = string_to_authors(authors) if authors else [_('Unknown')]
return title, authors
def get_metadata(self):
title, authors = self.get_title_and_authors()
mi = MetaInformation(title, authors)
publisher = unicode(self.publisher.text()).strip()
if publisher:
mi.publisher = publisher
author_sort = unicode(self.author_sort.text()).strip()
if author_sort:
mi.author_sort = author_sort
comments = unicode(self.comment.toPlainText()).strip()
if comments:
mi.comments = comments
mi.series_index = int(self.series_index.value())
if self.series.currentIndex() > -1:
mi.series = unicode(self.series.currentText()).strip()
tags = [t.strip() for t in unicode(self.tags.text()).strip().split(',')]
if tags:
mi.tags = tags
return mi
def read_settings(self):
for pref in self.config.option_set.preferences:
g = getattr(self, 'opt_'+pref.name, False)
if g:
if isinstance(g, (QSpinBox, QDoubleSpinBox)):
self.config.set(pref.name, g.value())
elif isinstance(g, (QLineEdit, QTextEdit)):
func = getattr(g, 'toPlainText', getattr(g, 'text', None))()
val = unicode(func)
self.config.set(pref.name, val if val else None)
elif isinstance(g, QComboBox):
self.config.set(pref.name, unicode(g.currentText()))
elif isinstance(g, QCheckBox):
self.config.set(pref.name, bool(g.isChecked()))
if self.row is not None:
self.db.set_conversion_options(self.id, self.OUTPUT.lower(), self.config.src)
def initialize_options(self):
self.initialize_metadata_options()
values = self.config.parse()
for pref in self.config.option_set.preferences:
g = getattr(self, 'opt_'+pref.name, False)
if g:
val = getattr(values, pref.name)
if val is None:
continue
if isinstance(g, (QSpinBox, QDoubleSpinBox)):
g.setValue(val)
elif isinstance(g, (QLineEdit, QTextEdit)):
getattr(g, 'setPlainText', g.setText)(val)
getattr(g, 'setCursorPosition', lambda x: x)(0)
elif isinstance(g, QComboBox):
for value in pref.choices:
g.addItem(value)
g.setCurrentIndex(g.findText(val))
elif isinstance(g, QCheckBox):
g.setCheckState(Qt.Checked if bool(val) else Qt.Unchecked)
def get_source_format(self):
self.source_format = None
if self.row is not None:
temp = self.db.formats(self.id, index_is_id=True)
if not temp:
error_dialog(self.parent(), _('Cannot convert'),
_('This book has no available formats')).exec_()
available_formats = [f.upper().strip() for f in temp.split(',')]
choices = [fmt.upper() for fmt in SOURCE_FORMATS if fmt.upper() in available_formats]
if not choices:
error_dialog(self.parent(), _('No available formats'),
_('Cannot convert %s as this book has no supported formats')%(self.title.text())).exec_()
elif len(choices) == 1:
self.source_format = choices[0]
else:
d = ChooseFormatDialog(self.parent(), _('Choose the format to convert to ')+self.OUTPUT, choices)
if d.exec_() == QDialog.Accepted:
self.source_format = d.format()
def accept(self):
for opt in ('chapter', 'level1_toc', 'level2_toc', 'level3_toc', 'page',
'page_names'):
text = unicode(getattr(self, 'opt_'+opt).text())
if text:
try:
XPath(text,namespaces={'re':'http://exslt.org/regular-expressions'})
except Exception, err:
error_dialog(self, _('Invalid XPath expression'),
_('The expression %s is invalid. Error: %s')%(text, err)
).exec_()
return
mi = self.get_metadata()
self.user_mi = mi
self.read_settings()
self.cover_file = None
if self.row is not None:
self.db.set_metadata(self.id, mi)
self.mi = self.db.get_metadata(self.id, index_is_id=True)
self.mi.application_id = uuid.uuid4()
opf = OPFCreator(os.getcwdu(), self.mi)
self.opf_file = PersistentTemporaryFile('.opf')
opf.render(self.opf_file)
self.opf_file.close()
if self.cover_changed:
self.db.set_cover(self.id, pixmap_to_data(self.cover.pixmap()))
cover = self.db.cover(self.id, index_is_id=True)
if cover:
cf = PersistentTemporaryFile('.jpeg')
cf.write(cover)
cf.close()
self.cover_file = cf
self.opts = self.config.parse()
QDialog.accept(self)

File diff suppressed because it is too large Load Diff

View File

@ -1,425 +0,0 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, codecs
from PyQt4.QtCore import QObject, SIGNAL, Qt
from PyQt4.QtGui import QAbstractSpinBox, QLineEdit, QCheckBox, QDialog, \
QPixmap, QTextEdit, QListWidgetItem, QIcon
from calibre.gui2.dialogs.lrf_single_ui import Ui_LRFSingleDialog
from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
from calibre.gui2 import qstring_to_unicode, error_dialog, \
pixmap_to_data, choose_images, config
from calibre.gui2.widgets import FontFamilyModel
from calibre.ebooks.lrf import option_parser
from calibre.ptempfile import PersistentTemporaryFile
from calibre.constants import __appname__
from calibre.ebooks.metadata import authors_to_string, string_to_authors, authors_to_sort_string
font_family_model = None
class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
PARSER = option_parser('')
PREPROCESS_OPTIONS = [ o for o in PARSER.option_groups if o.title == 'PREPROCESSING OPTIONS'][0].option_list
@classmethod
def options(cls):
options = cls.PARSER.option_list
for g in cls.PARSER.option_groups:
options.extend(g.option_list)
for opt in options:
yield opt
@classmethod
def option_to_name(cls, opt):
src = opt.get_opt_string()
return 'gui_' + src[2:].replace('-', '_')
def initialize_common(self):
self.output_format = 'LRF'
self.setup_tooltips()
self.initialize_options()
global font_family_model
if font_family_model is None:
font_family_model = FontFamilyModel()
self.font_family_model = font_family_model
self.gui_serif_family.setModel(self.font_family_model)
self.gui_sans_family.setModel(self.font_family_model)
self.gui_mono_family.setModel(self.font_family_model)
self.load_saved_global_defaults()
def populate_list(self):
self.__w = []
self.__w.append(QIcon(':/images/dialog_information.svg'))
self.item1 = QListWidgetItem(self.__w[-1], _("Metadata"), self.categoryList)
self.__w.append(QIcon(':/images/lookfeel.svg'))
self.item2 = QListWidgetItem(self.__w[-1], _('Look & Feel'), self.categoryList)
self.__w.append(QIcon(':/images/page.svg'))
self.item3 = QListWidgetItem(self.__w[-1], _('Page Setup'), self.categoryList)
self.__w.append(QIcon(':/images/chapters.svg'))
self.item4 = QListWidgetItem(self.__w[-1], _('Chapter Detection'), self.categoryList)
def __init__(self, window, db, row):
QDialog.__init__(self, window)
Ui_LRFSingleDialog.__init__(self)
self.setupUi(self)
self.populate_list()
self.categoryList.setCurrentRow(0)
QObject.connect(self.categoryList, SIGNAL('itemEntered(QListWidgetItem *)'),
self.show_category_help)
QObject.connect(self.cover_button, SIGNAL("clicked(bool)"), self.select_cover)
#self.categoryList.leaveEvent = self.reset_help
self.reset_help()
self.selected_format = None
self.initialize_common()
self.db = db
self.row = row
self.cover_changed = False
self.cpixmap = None
self.changed = False
if db:
self.id = self.db.id(self.row)
self.read_saved_options()
self.initialize_metadata()
formats = self.db.formats(self.row)
formats = [i.upper() for i in formats.split(',')] if formats else []
try:
formats.remove(self.output_format)
except ValueError:
pass
if not formats:
d = error_dialog(window, _('No available formats'),
_('Cannot convert %s as this book has no supported formats')%(self.gui_title.text()))
d.exec_()
if len(formats) > 1:
d = ChooseFormatDialog(window, _('Choose the format to convert into LRF'), formats)
d.exec_()
if d.result() == QDialog.Accepted:
self.selected_format = d.format()
elif len(formats) > 0:
self.selected_format = formats[0]
if self.selected_format:
self.setWindowTitle(_('Convert %s to LRF')%(self.selected_format,))
else:
self.setWindowTitle(_('Set conversion defaults'))
def load_saved_global_defaults(self):
cmdline = config['LRF_conversion_defaults']
if cmdline:
self.set_options_from_cmdline(cmdline)
def set_options_from_cmdline(self, cmdline):
for opt in self.options():
guiname = self.option_to_name(opt)
try:
obj = getattr(self, guiname)
except AttributeError:
continue
if isinstance(obj, QCheckBox):
if opt.get_opt_string() in cmdline:
obj.setCheckState(Qt.Checked)
else:
obj.setCheckState(Qt.Unchecked)
try:
i = cmdline.index(opt.get_opt_string())
except ValueError:
continue
if isinstance(obj, QAbstractSpinBox):
obj.setValue(cmdline[i+1])
elif isinstance(obj, QLineEdit):
obj.setText(cmdline[i+1])
elif isinstance(obj, QTextEdit):
obj.setPlainText(cmdline[i+1])
profile = cmdline[cmdline.index('--profile')+1]
pindex = self.gui_profile.findText(profile)
if pindex >= 0:
self.gui_profile.setCurrentIndex(pindex)
for prepro in self.PREPROCESS_OPTIONS:
ops = prepro.get_opt_string()
if ops in cmdline:
self.preprocess.setCurrentIndex(self.preprocess.findText(ops[2:]))
break
for opt in ('--serif-family', '--sans-family', '--mono-family'):
if opt in cmdline:
print 'in'
family = cmdline[cmdline.index(opt)+1].split(',')[-1].strip()
obj = getattr(self, 'gui_'+opt[2:].replace('-', '_'))
try:
obj.setCurrentIndex(self.font_family_model.index_of(family))
except:
continue
def read_saved_options(self):
cmdline = self.db.conversion_options(self.id, self.output_format.lower())
if cmdline:
self.set_options_from_cmdline(cmdline)
def select_cover(self, checked):
files = choose_images(self, 'change cover dialog',
_('Choose cover for ') + qstring_to_unicode(self.gui_title.text()))
if not files:
return
_file = files[0]
if _file:
_file = os.path.abspath(_file)
if not os.access(_file, os.R_OK):
d = error_dialog(self.window, _('Cannot read'),
_('You do not have permission to read the file: ') + _file)
d.exec_()
return
cf, cover = None, None
try:
cf = open(_file, "rb")
cover = cf.read()
except IOError, e:
d = error_dialog(self.window, _('Error reading file'),
_("<p>There was an error reading from file: <br /><b>") + _file + "</b></p><br />"+str(e))
d.exec_()
if cover:
pix = QPixmap()
pix.loadFromData(cover)
if pix.isNull():
d = error_dialog(self.window, _file + _(" is not a valid picture"))
d.exec_()
else:
self.cover_path.setText(_file)
self.cover.setPixmap(pix)
self.cover_changed = True
self.cpixmap = pix
def initialize_metadata(self):
db, row = self.db, self.row
self.id = self.db.id(row)
self.gui_title.setText(db.title(row))
au = self.db.authors(row)
if au:
au = [a.strip().replace('|', ',') for a in au.split(',')]
self.gui_author.setText(authors_to_string(au))
else:
self.gui_author.setText('')
aus = self.db.author_sort(row)
self.gui_author_sort.setText(aus if aus else '')
pub = self.db.publisher(row)
self.gui_publisher.setText(pub if pub else '')
tags = self.db.tags(row)
self.tags.setText(tags if tags else '')
comments = self.db.comments(row)
self.gui_comment.setPlainText(comments if comments else '')
all_series = self.db.all_series()
all_series.sort(cmp=lambda x, y : cmp(x[1], y[1]))
series_id = self.db.series_id(row)
idx, c = None, 0
for i in all_series:
id, name = i
if id == series_id:
idx = c
self.series.addItem(name)
c += 1
self.series.lineEdit().setText('')
if idx is not None:
self.series.setCurrentIndex(idx)
self.series_index.setValue(self.db.series_index(row))
cover = self.db.cover(row)
if cover:
pm = QPixmap()
pm.loadFromData(cover)
if not pm.isNull():
self.cover.setPixmap(pm)
def initialize_options(self):
'''Initialize non metadata options from the defaults.'''
for name in self.option_map.keys():
default = self.option_map[name].default
obj = getattr(self, name)
if isinstance(obj, QAbstractSpinBox):
obj.setValue(default)
elif isinstance(obj, QLineEdit) and default:
obj.setText(default)
elif isinstance(obj, QTextEdit) and default:
obj.setPlainText(default)
elif isinstance(obj, QCheckBox):
state = Qt.Checked if default else Qt.Unchecked
obj.setCheckState(state)
self.gui_headerformat.setDisabled(True)
self.gui_header_separation.setDisabled(True)
self.gui_use_metadata_cover.setCheckState(Qt.Checked)
self.preprocess.addItem(_('No preprocessing'))
for opt in self.PREPROCESS_OPTIONS:
self.preprocess.addItem(opt.get_opt_string()[2:])
ph = _('Preprocess the file before converting to LRF. This is useful if you know that the file is from a specific source. Known sources:')
ph += _('<ol><li><b>baen</b> - Books from BAEN Publishers</li>')
ph += _('<li><b>pdftohtml</b> - HTML files that are the output of the program pdftohtml</li>')
ph += _('<li><b>book-designer</b> - HTML0 files from Book Designer</li>')
self.preprocess.setToolTip(ph)
self.preprocess.setWhatsThis(ph)
for profile in self.PARSER.get_option('--profile').choices:
if self.gui_profile.findText(profile) < 0:
self.gui_profile.addItem(profile)
def setup_tooltips(self):
def show_item_help(obj, event):
self.set_help(obj.toolTip())
self.option_map = {}
for opt in self.options():
try:
help = opt.help.replace('%default', str(opt.default))
except (ValueError, TypeError):
help = opt.help
guiname = self.option_to_name(opt)
if hasattr(self, guiname):
obj = getattr(self, guiname)
obj.setToolTip(help)
obj.setWhatsThis(help)
self.option_map[guiname] = opt
obj.__class__.enterEvent = show_item_help
#obj.leaveEvent = self.reset_help
self.preprocess.__class__.enterEvent = show_item_help
#self.preprocess.leaveEvent = self.reset_help
def show_category_help(self, item):
text = qstring_to_unicode(item.text())
help = {
_('Metadata') : _('Specify metadata such as title and author for the book.<p>Metadata will be updated in the database as well as the generated LRF file.'),
_('Look & Feel') : _('Adjust the look of the generated LRF file by specifying things like font sizes and the spacing between words.'),
_('Page Setup') : _('Specify the page settings like margins and the screen size of the target device.'),
_('Chapter Detection') : _('Fine tune the detection of chapter and section headings.'),
}
self.set_help(help[text])
def set_help(self, msg):
if msg and getattr(msg, 'strip', lambda:True)():
self.help_view.setHtml('<html><body>%s</body></html>'%(msg,))
def reset_help(self, *args):
self.set_help(_('<font color="gray">No help available</font>'))
if args:
args[0].accept()
def build_commandline(self):
cmd = [__appname__]
for name in self.option_map.keys():
opt = self.option_map[name].get_opt_string()
obj = getattr(self, name)
if isinstance(obj, QAbstractSpinBox):
cmd.extend([opt, obj.value()])
elif isinstance(obj, QLineEdit):
val = qstring_to_unicode(obj.text())
if val:
if opt == '--encoding':
try:
codecs.getdecoder(val)
except:
d = error_dialog(self, 'Unknown encoding',
'<p>Unknown encoding: %s<br/>For a list of known encodings see http://docs.python.org/lib/standard-encodings.html'%val)
d.exec_()
return
cmd.extend([opt, val])
elif isinstance(obj, QTextEdit):
val = qstring_to_unicode(obj.toPlainText())
if val:
cmd.extend([opt, val])
elif isinstance(obj, QCheckBox):
if obj.checkState() == Qt.Checked:
cmd.append(opt)
text = qstring_to_unicode(self.preprocess.currentText())
if text != _('No preprocessing'):
cmd.append(u'--'+text)
cmd.extend([u'--profile', qstring_to_unicode(self.gui_profile.currentText())])
for opt in ('--serif-family', '--sans-family', '--mono-family'):
obj = getattr(self, 'gui_'+opt[2:].replace('-', '_'))
family = qstring_to_unicode(obj.itemText(obj.currentIndex())).strip()
if family != 'None':
cmd.extend([opt, family])
return cmd
def title(self):
return qstring_to_unicode(self.gui_title.text())
def write_metadata(self):
title = qstring_to_unicode(self.gui_title.text())
self.db.set_title(self.id, title)
au = unicode(self.gui_author.text())
if au:
self.db.set_authors(self.id, string_to_authors(au))
aus = qstring_to_unicode(self.gui_author_sort.text())
if not aus:
t = self.db.authors(self.id, index_is_id=True)
if not t:
t = _('Unknown')
aus = [a.strip().replace('|', ',') for a in t.split(',')]
aus = authors_to_sort_string(aus)
self.db.set_author_sort(self.id, aus)
self.db.set_publisher(self.id, qstring_to_unicode(self.gui_publisher.text()))
self.db.set_tags(self.id, qstring_to_unicode(self.tags.text()).split(','))
self.db.set_series(self.id, qstring_to_unicode(self.series.currentText()))
self.db.set_series_index(self.id, self.series_index.value())
if self.cover_changed:
self.db.set_cover(self.id, pixmap_to_data(self.cover.pixmap()))
def accept(self):
cmdline = self.build_commandline()
if cmdline is None:
return
if self.db:
self.cover_file = None
self.write_metadata()
cover = self.db.cover(self.row)
if cover:
self.cover_file = PersistentTemporaryFile(suffix='.jpeg')
self.cover_file.write(cover)
self.cover_file.close()
self.db.set_conversion_options(self.id, self.output_format.lower(), cmdline)
if self.cover_file:
cmdline.extend([u'--cover', self.cover_file.name])
self.cmdline = [unicode(i) for i in cmdline]
else:
config.set('LRF_conversion_defaults', cmdline)
QDialog.accept(self)
class LRFBulkDialog(LRFSingleDialog):
def __init__(self, window):
QDialog.__init__(self, window)
Ui_LRFSingleDialog.__init__(self)
self.setupUi(self)
self.populate_list()
self.categoryList.takeItem(0)
self.stack.removeWidget(self.stack.widget(0))
self.categoryList.setCurrentRow(0)
self.initialize_common()
self.setWindowTitle(_('Bulk convert ebooks to LRF'))
def accept(self):
self.cmdline = [unicode(i) for i in self.build_commandline()]
for meta in ('--title', '--author', '--publisher', '--comment'):
try:
index = self.cmdline.index(meta)
self.cmdline[index:index+2] = []
except ValueError:
continue
self.cover_file = None
QDialog.accept(self)

File diff suppressed because it is too large Load Diff

View File

@ -1,22 +0,0 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
from calibre.gui2.dialogs.epub import Config as _Config
from calibre.ebooks.mobi.from_any import config as mobiconfig
class Config(_Config):
OUTPUT = 'MOBI'
def __init__(self, parent, db, row=None):
_Config.__init__(self, parent, db, row=row, config=mobiconfig)
def hide_controls(self):
self.profile_label.setVisible(False)
self.opt_profile.setVisible(False)
self.opt_dont_split_on_page_breaks.setVisible(False)
self.opt_preserve_tag_structure.setVisible(False)
self.opt_linearize_tables.setVisible(False)
self.page_map_box.setVisible(False)

View File

@ -11,17 +11,8 @@ from PyQt4.Qt import QDialog
from calibre.customize.ui import available_input_formats
from calibre.utils.config import prefs
from calibre.gui2.dialogs.lrf_single import LRFSingleDialog, LRFBulkDialog
from calibre.gui2.dialogs.epub import Config as EPUBConvert
from calibre.gui2.dialogs.mobi import Config as MOBIConvert
import calibre.gui2.dialogs.comicconf as ComicConf
from calibre.gui2 import warning_dialog
from calibre.ptempfile import PersistentTemporaryFile
from calibre.ebooks.lrf import preferred_source_formats as LRF_PREFERRED_SOURCE_FORMATS
from calibre.ebooks.metadata.opf import OPFCreator
from calibre.ebooks.epub.from_any import SOURCE_FORMATS as EPUB_PREFERRED_SOURCE_FORMATS, config as epubconfig
from calibre.ebooks.mobi.from_any import config as mobiconfig
from calibre.ebooks.lrf.comic.convert_from import config as comicconfig
# Ordered list of source formats. Items closer to the beginning are
# preferred for conversion over those toward the end.