This commit is contained in:
Kovid Goyal 2009-03-29 18:26:44 -07:00
parent b9d9df5f20
commit b98ada75f7
7 changed files with 187 additions and 141 deletions

View File

@ -1,6 +1,6 @@
from __future__ import with_statement
'''
Defines the plugin sytem for conversions.
Defines the plugin system for conversions.
'''
import re, os, shutil
@ -10,24 +10,24 @@ from calibre import CurrentDir
from calibre.customize import Plugin
class ConversionOption(object):
'''
Class representing conversion options
'''
def __init__(self, name=None, help=None, long_switch=None,
def __init__(self, name=None, help=None, long_switch=None,
short_switch=None, choices=None):
self.name = name
self.help = help
self.long_switch = long_switch
self.short_switch = short_switch
self.choices = choices
if self.long_switch is None:
self.long_switch = self.name.replace('_', '-')
self.validate_parameters()
def validate_parameters(self):
'''
Validate the parameters passed to :method:`__init__`.
@ -36,21 +36,21 @@ class ConversionOption(object):
raise ValueError(self.name + ' is not a valid Python identifier')
if not self.help:
raise ValueError('You must set the help text')
def __hash__(self):
return hash(self.name)
def __eq__(self, other):
return hash(self) == hash(other)
class OptionRecommendation(object):
LOW = 1
MED = 2
HIGH = 3
def __init__(self, recommended_value=None, level=LOW, **kwargs):
'''
An option recommendation. That is, an option as well as its recommended
An option recommendation. That is, an option as well as its recommended
value and the level of the recommendation.
'''
self.level = level
@ -58,9 +58,9 @@ class OptionRecommendation(object):
self.option = kwargs.pop('option', None)
if self.option is None:
self.option = ConversionOption(**kwargs)
self.validate_parameters()
def validate_parameters(self):
if self.option.choices and self.recommended_value not in \
self.option.choices:
@ -68,30 +68,30 @@ class OptionRecommendation(object):
self.option.name)
if not (isinstance(self.recommended_value, (int, float, str, unicode))\
or self.recommended_value is None):
raise ValueError('OpRec: %s:'%self.option.name +
repr(self.recommended_value) +
raise ValueError('OpRec: %s:'%self.option.name +
repr(self.recommended_value) +
' is not a string or a number')
class InputFormatPlugin(Plugin):
'''
InputFormatPlugins are responsible for converting a document into
InputFormatPlugins are responsible for converting a document into
HTML+OPF+CSS+etc.
The results of the conversion *must* be encoded in UTF-8.
The main action happens in :method:`convert`.
'''
type = _('Conversion Input')
can_be_disabled = False
supported_platforms = ['windows', 'osx', 'linux']
#: Set of file types for which this plugin should be run
#: For example: ``set(['azw', 'mobi', 'prc'])``
file_types = set([])
#: Options shared by all Input format plugins. Do not override
#: in sub-classes. Use :member:`options` instead. Every option must be an
#: instance of :class:`OptionRecommendation`.
#: instance of :class:`OptionRecommendation`.
common_options = set([
OptionRecommendation(name='debug_input',
recommended_value=None, level=OptionRecommendation.LOW,
@ -101,7 +101,7 @@ class InputFormatPlugin(Plugin):
'WARNING: This completely deletes the contents of '
'the specified directory.')
),
OptionRecommendation(name='input_encoding',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Specify the character encoding of the input document. If '
@ -110,73 +110,73 @@ class InputFormatPlugin(Plugin):
'do not declare an encoding or that have erroneous '
'encoding declarations.')
),
])
#: Options to customize the behavior of this plugin. Every option must be an
#: instance of :class:`OptionRecommendation`.
#: instance of :class:`OptionRecommendation`.
options = set([])
#: A set of 3-tuples of the form
#: A set of 3-tuples of the form
#: (option_name, recommended_value, recommendation_level)
recommendations = set([])
def convert(self, stream, options, file_ext, parse_cache, log, accelerators):
'''
This method must be implemented in sub-classes. It must return
the path to the created OPF file. All output should be contained in
the path to the created OPF file. All output should be contained in
the current directory. If this plugin creates files outside the current
directory they must be deleted/marked for deletion before this method
directory they must be deleted/marked for deletion before this method
returns.
:param stream: A file like object that contains the input file.
:param options: Options to customize the conversion process.
:param options: Options to customize the conversion process.
Guaranteed to have attributes corresponding
to all the options declared by this plugin. In
to all the options declared by this plugin. In
addition, it will have a verbose attribute that
takes integral values from zero upwards. Higher numbers
mean be more verbose. Another useful attribute is
``input_profile`` that is an instance of
mean be more verbose. Another useful attribute is
``input_profile`` that is an instance of
:class:`calibre.customize.profiles.InputProfile`.
:param file_ext: The extension (without the .) of the input file. It
is guaranteed to be one of the `file_types` supported
by this plugin.
:param parse_cache: A dictionary that maps absolute file paths to
parsed representations of their contents. For
HTML the representation is an lxml element of
HTML the representation is an lxml element of
the root of the tree. For CSS it is a cssutils
stylesheet. If this plugin parses any of the
output files, it should add them to the cache
so that later stages of the conversion wont
have to re-parse them. If a parsed representation
is in the cache, there is no need to actually
is in the cache, there is no need to actually
write the file to disk.
:param log: A :class:`calibre.utils.logging.Log` object. All output
:param log: A :class:`calibre.utils.logging.Log` object. All output
should use this object.
:param accelarators: A dictionary of various information that the input
plugin can get easily that would speed up the
subsequent stages of the conversion.
'''
raise NotImplementedError
def __call__(self, stream, options, file_ext, parse_cache, log,
def __call__(self, stream, options, file_ext, parse_cache, log,
accelerators, output_dir):
log('InputFormatPlugin: %s running'%self.name, end=' ')
if hasattr(stream, 'name'):
log('on', stream.name)
with CurrentDir(output_dir):
for x in os.listdir('.'):
shutil.rmtree(x) if os.path.isdir(x) else os.remove(x)
ret = self.convert(stream, options, file_ext, parse_cache,
ret = self.convert(stream, options, file_ext, parse_cache,
log, accelerators)
for key in list(parse_cache.keys()):
if os.path.abspath(key) != key:
@ -184,7 +184,7 @@ class InputFormatPlugin(Plugin):
'relative path: %s')%(self.name, key)
)
parse_cache[os.path.abspath(key)] = parse_cache.pop(key)
if options.debug_input is not None:
options.debug_input = os.path.abspath(options.debug_input)
if not os.path.exists(options.debug_input):
@ -194,15 +194,15 @@ class InputFormatPlugin(Plugin):
if hasattr(obj, 'cssText'):
raw = obj.cssText
else:
raw = html.tostring(obj, encoding='utf-8', method='xml',
raw = html.tostring(obj, encoding='utf-8', method='xml',
include_meta_content_type=True, pretty_print=True)
if isinstance(raw, unicode):
raw = raw.encode('utf-8')
open(f, 'wb').write(raw)
shutil.copytree('.', options.debug_input)
return ret
@ -210,32 +210,32 @@ class OutputFormatPlugin(Plugin):
'''
OutputFormatPlugins are responsible for converting an OEB document
(OPF+HTML) into an output ebook.
The OEB document can be assumed to be encoded in UTF-8.
The main action happens in :method:`convert`.
'''
type = _('Conversion Output')
can_be_disabled = False
supported_platforms = ['windows', 'osx', 'linux']
#: The file type (extension without leading period) that this
#: plugin outputs
file_type = None
#: Options shared by all Input format plugins. Do not override
#: in sub-classes. Use :member:`options` instead. Every option must be an
#: instance of :class:`OptionRecommendation`.
#: instance of :class:`OptionRecommendation`.
common_options = set([])
#: Options to customize the behavior of this plugin. Every option must be an
#: instance of :class:`OptionRecommendation`.
#: instance of :class:`OptionRecommendation`.
options = set([])
#: A set of 3-tuples of the form
#: A set of 3-tuples of the form
#: (option_name, recommended_value, recommendation_level)
recommendations = set([])
def convert(self, oeb_book, input_plugin, options, parse_cache, log):
def convert(self, oeb_book, input_plugin, options, context, log):
raise NotImplementedError

View File

@ -30,7 +30,7 @@ options. the available options depend on the input and output file types. \
To get help on them specify the input and output file and then use the -h \
option.
For full documentation of the conversion system see
For full documentation of the conversion system see
''') + 'http://calibre.kovidgoyal.net/user_manual/conversion.html'
import sys, os
@ -50,22 +50,22 @@ def check_command_line_options(parser, args, log):
print_help(parser)
log.error('\n\nYou must specify the input AND output files')
raise SystemExit(1)
input = os.path.abspath(args[1])
if not os.access(input, os.R_OK):
log.error('Cannot read from', input)
raise SystemExit(1)
output = args[2]
if output.startswith('.'):
output = os.path.splitext(os.path.basename(input))[0]+output
output = os.path.abspath(output)
if '.' in output:
if os.path.exists(output):
log.warn('WARNING:', output, 'exists. Deleting.')
os.remove(output)
return input, output
def option_recommendation_to_cli_option(add_option, rec):
@ -79,18 +79,18 @@ def option_recommendation_to_cli_option(add_option, rec):
def add_input_output_options(parser, plumber):
input_options, output_options = \
plumber.input_options, plumber.output_options
def add_options(group, options):
for opt in options:
option_recommendation_to_cli_option(group, opt)
if input_options:
title = _('INPUT OPTIONS')
io = OptionGroup(parser, title, _('Options to control the processing'
' of the input %s file')%plumber.input_fmt)
add_options(io.add_option, input_options)
parser.add_option_group(io)
if output_options:
title = plumber.output_fmt.upper() + ' ' + _('OPTIONS')
oo = OptionGroup(parser, title, _('Options to control the processing'
@ -106,7 +106,7 @@ def add_pipeline_options(parser, plumber):
'output_profile',
]
),
'METADATA' : (_('Options to set metadata in the output'),
plumber.metadata_option_names,
),
@ -114,19 +114,19 @@ def add_pipeline_options(parser, plumber):
[
'verbose',
]),
}
group_order = ['', 'METADATA', 'DEBUG']
for group in group_order:
desc, options = groups[group]
if group:
group = OptionGroup(parser, group, desc)
parser.add_option_group(group)
add_option = group.add_option if group != '' else parser.add_option
for name in options:
rec = plumber.get_option_by_name(name)
if rec.level < rec.HIGH:
@ -141,27 +141,27 @@ def main(args=sys.argv):
if len(args) < 3:
print_help(parser, log)
return 1
input, output = check_command_line_options(parser, args, log)
from calibre.ebooks.conversion.plumber import Plumber
plumber = Plumber(input, output, log)
add_input_output_options(parser, plumber)
add_pipeline_options(parser, plumber)
opts = parser.parse_args(args)[0]
recommendations = [(n.dest, getattr(opts, n.dest),
recommendations = [(n.dest, getattr(opts, n.dest),
OptionRecommendation.HIGH) \
for n in parser.options_iter()
if n.dest]
plumber.merge_ui_recommendations(recommendations)
plumber.run()
log(_('Output saved to'), ' ', plumber.output)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -5,7 +5,7 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.customize.conversion import OptionRecommendation
from calibre.customize.conversion import OptionRecommendation
from calibre.customize.ui import input_profiles, output_profiles, \
plugin_for_input_format, plugin_for_output_format
@ -13,23 +13,35 @@ class OptionValues(object):
pass
class Plumber(object):
'''
The `Plumber` manages the conversion pipeline. An UI should call the methods
:method:`merge_ui_recommendations` and then :method:`run`. The plumber will
take care of the rest.
'''
metadata_option_names = [
'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments',
'publisher', 'series', 'series_index', 'rating', 'isbn',
'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments',
'publisher', 'series', 'series_index', 'rating', 'isbn',
'tags', 'book_producer', 'language'
]
def __init__(self, input, output, log):
'''
:param input: Path to input file.
:param output: Path to output file/directory
'''
self.input = input
self.output = output
self.log = log
# Initialize the conversion options that are independent of input and
# output formats. The input and output plugins can still disable these
# options via recommendations.
self.pipeline_options = [
OptionRecommendation(name='verbose',
OptionRecommendation(name='verbose',
recommended_value=0, level=OptionRecommendation.LOW,
short_switch='v',
short_switch='v',
help=_('Level of verbosity. Specify multiple times for greater '
'verbosity.')
),
@ -54,15 +66,15 @@ OptionRecommendation(name='output_profile',
'will work on a device. For example EPUB on the SONY reader.'
)
),
OptionRecommendation(name='read_metadata_from_opf',
OptionRecommendation(name='read_metadata_from_opf',
recommended_value=None, level=OptionRecommendation.LOW,
short_switch='m',
short_switch='m',
help=_('Read metadata from the specified OPF file. Metadata read '
'from this file will override any metadata in the source '
'from this file will override any metadata in the source '
'file.')
),
OptionRecommendation(name='title',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Set the title.')),
@ -120,57 +132,70 @@ OptionRecommendation(name='language',
help=_('Set the language.')),
]
input_fmt = os.path.splitext(input)[1]
if not input_fmt:
raise ValueError('Input file must have an extension')
input_fmt = input_fmt[1:].lower()
output_fmt = os.path.splitext(output)[1]
if not output_fmt:
output_fmt = '.oeb'
output_fmt = output_fmt[1:].lower()
self.input_plugin = plugin_for_input_format(input_fmt)
self.output_plugin = plugin_for_output_format(output_fmt)
if self.input_plugin is None:
raise ValueError('No plugin to handle input format: '+input_fmt)
if self.output_plugin is None:
raise ValueError('No plugin to handle output format: '+output_fmt)
self.input_fmt = input_fmt
self.output_fmt = output_fmt
# Build set of all possible options. Two options are equal iff their
# names are the same.
self.input_options = self.input_plugin.options.union(
self.input_plugin.common_options)
self.output_options = self.output_plugin.options.union(
self.output_plugin.common_options)
self.output_plugin.common_options)
# Remove the options that have been disabled by recommendations from the
# plugins.
self.merge_plugin_recommendations()
def get_option_by_name(self, name):
for group in (self.input_options, self.pipeline_options,
for group in (self.input_options, self.pipeline_options,
self.output_options):
for rec in group:
if rec.option == name:
return rec
def merge_plugin_recommendations(self):
for source in (self.input_plugin, self.output_plugin):
for name, val, level in source.recommendations:
rec = self.get_option_by_name(name)
if rec is not None and rec.level <= level:
rec.recommended_value = val
def merge_ui_recommendations(self, recommendations):
'''
Merge recommendations from the UI. As long as the UI recommendation
level is >= the baseline recommended level, the UI value is used,
*except* if the baseline has a recommendation level of `HIGH`.
'''
for name, val, level in recommendations:
rec = self.get_option_by_name(name)
if rec is not None and rec.level <= level and rec.level < rec.HIGH:
rec.recommended_value = val
def read_user_metadata(self):
'''
Read all metadata specified by the user. Command line options override
metadata from a specified OPF file.
'''
from calibre.ebooks.metadata import MetaInformation, string_to_authors
from calibre.ebooks.metadata.opf2 import OPF
mi = MetaInformation(None, [])
@ -194,43 +219,55 @@ OptionRecommendation(name='language',
mi.cover_data = ('', open(mi.cover, 'rb').read())
mi.cover = None
self.user_metadata = mi
def setup_options(self):
'''
Setup the `self.opts` object.
'''
self.opts = OptionValues()
for group in (self.input_options, self.pipeline_options,
for group in (self.input_options, self.pipeline_options,
self.output_options):
for rec in group:
setattr(self.opts, rec.option.name, rec.recommended_value)
for x in input_profiles():
if x.short_name == self.opts.input_profile:
self.opts.input_profile = x
break
for x in output_profiles():
if x.short_name == self.opts.output_profile:
self.opts.output_profile = x
break
self.read_user_metadata()
def run(self):
'''
Run the conversion pipeline
'''
# Setup baseline option values
self.setup_options()
# Run any preprocess plugins
from calibre.customize.ui import run_plugins_on_preprocess
self.input = run_plugins_on_preprocess(self.input)
# Create an OEBBook from the input file. The input plugin does all the
# heavy lifting.
from calibre.ebooks.oeb.reader import OEBReader
from calibre.ebooks.oeb.base import OEBBook
parse_cache, accelerators = {}, {}
opfpath = self.input_plugin(open(self.input, 'rb'), self.opts,
opfpath = self.input_plugin(open(self.input, 'rb'), self.opts,
self.input_fmt, parse_cache, self.log,
accelerators)
self.reader = OEBReader()
self.oeb = OEBBook(self.log, parse_cache=parse_cache)
self.oeb = OEBBook(self.log, parse_cache=parse_cache)
# Read OEB Book into OEBBook
self.reader(self.oeb, opfpath)

View File

@ -1260,7 +1260,7 @@ class OEBBook(object):
"""Create empty book. Optional arguments:
:param parse_cache: A cache of parsed XHTML/CSS. Keys are absolute
paths to te cached files and values are lxml root objects and
paths to the cached files and values are lxml root objects and
cssutils stylesheets.
:param:`encoding`: Default encoding for textual content read
from an external container.

View File

@ -6,12 +6,12 @@ __docformat__ = 'restructuredtext en'
from calibre.customize.conversion import OutputFormatPlugin
class OEBOutput(OutputFormatPlugin):
name = 'OEB Output'
author = 'Kovid Goyal'
file_type = 'oeb'
def convert(self, oeb_book, input_plugin, options, parse_cache, log):
pass
def convert(self, oeb_book, input_plugin, options, context, log):
pass

View File

@ -0,0 +1,10 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@ -20,11 +20,10 @@ class ManifestTrimmer(object):
@classmethod
def generate(cls, opts):
return cls()
def __call__(self, oeb, context):
oeb.logger.info('Trimming unused files from manifest...')
used = set()
hrefs = oeb.manifest.hrefs
for term in oeb.metadata:
for item in oeb.metadata[term]:
if item.value in oeb.manifest.hrefs:
@ -42,7 +41,7 @@ class ManifestTrimmer(object):
while unchecked:
new = set()
for item in unchecked:
if (item.media_type in OEB_DOCS or
if (item.media_type in OEB_DOCS or
item.media_type[-4:] in ('/xml', '+xml')) and \
item.data is not None:
hrefs = [sel(item.data) for sel in LINK_SELECTORS]