From b98ada75f7741349614f82b073a9f8f9c7288804 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 29 Mar 2009 18:26:44 -0700 Subject: [PATCH] IGN:... --- src/calibre/customize/conversion.py | 134 +++++++++--------- src/calibre/ebooks/conversion/cli.py | 46 +++--- src/calibre/ebooks/conversion/plumber.py | 119 ++++++++++------ src/calibre/ebooks/oeb/base.py | 2 +- src/calibre/ebooks/oeb/output.py | 12 +- src/calibre/ebooks/oeb/transforms/__init__.py | 10 ++ .../ebooks/oeb/transforms/trimmanifest.py | 5 +- 7 files changed, 187 insertions(+), 141 deletions(-) diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py index a77e32beee..5cf497d904 100644 --- a/src/calibre/customize/conversion.py +++ b/src/calibre/customize/conversion.py @@ -1,6 +1,6 @@ from __future__ import with_statement ''' -Defines the plugin sytem for conversions. +Defines the plugin system for conversions. ''' import re, os, shutil @@ -10,24 +10,24 @@ from calibre import CurrentDir from calibre.customize import Plugin class ConversionOption(object): - + ''' Class representing conversion options ''' - - def __init__(self, name=None, help=None, long_switch=None, + + def __init__(self, name=None, help=None, long_switch=None, short_switch=None, choices=None): self.name = name self.help = help self.long_switch = long_switch self.short_switch = short_switch self.choices = choices - + if self.long_switch is None: self.long_switch = self.name.replace('_', '-') - + self.validate_parameters() - + def validate_parameters(self): ''' Validate the parameters passed to :method:`__init__`. @@ -36,21 +36,21 @@ class ConversionOption(object): raise ValueError(self.name + ' is not a valid Python identifier') if not self.help: raise ValueError('You must set the help text') - + def __hash__(self): return hash(self.name) - + def __eq__(self, other): return hash(self) == hash(other) - + class OptionRecommendation(object): LOW = 1 MED = 2 HIGH = 3 - + def __init__(self, recommended_value=None, level=LOW, **kwargs): ''' - An option recommendation. That is, an option as well as its recommended + An option recommendation. That is, an option as well as its recommended value and the level of the recommendation. ''' self.level = level @@ -58,9 +58,9 @@ class OptionRecommendation(object): self.option = kwargs.pop('option', None) if self.option is None: self.option = ConversionOption(**kwargs) - + self.validate_parameters() - + def validate_parameters(self): if self.option.choices and self.recommended_value not in \ self.option.choices: @@ -68,30 +68,30 @@ class OptionRecommendation(object): self.option.name) if not (isinstance(self.recommended_value, (int, float, str, unicode))\ or self.recommended_value is None): - raise ValueError('OpRec: %s:'%self.option.name + - repr(self.recommended_value) + + raise ValueError('OpRec: %s:'%self.option.name + + repr(self.recommended_value) + ' is not a string or a number') - + class InputFormatPlugin(Plugin): ''' - InputFormatPlugins are responsible for converting a document into + InputFormatPlugins are responsible for converting a document into HTML+OPF+CSS+etc. The results of the conversion *must* be encoded in UTF-8. The main action happens in :method:`convert`. ''' - + type = _('Conversion Input') can_be_disabled = False supported_platforms = ['windows', 'osx', 'linux'] - + #: Set of file types for which this plugin should be run #: For example: ``set(['azw', 'mobi', 'prc'])`` file_types = set([]) - + #: Options shared by all Input format plugins. Do not override #: in sub-classes. Use :member:`options` instead. Every option must be an - #: instance of :class:`OptionRecommendation`. + #: instance of :class:`OptionRecommendation`. common_options = set([ OptionRecommendation(name='debug_input', recommended_value=None, level=OptionRecommendation.LOW, @@ -101,7 +101,7 @@ class InputFormatPlugin(Plugin): 'WARNING: This completely deletes the contents of ' 'the specified directory.') ), - + OptionRecommendation(name='input_encoding', recommended_value=None, level=OptionRecommendation.LOW, help=_('Specify the character encoding of the input document. If ' @@ -110,73 +110,73 @@ class InputFormatPlugin(Plugin): 'do not declare an encoding or that have erroneous ' 'encoding declarations.') ), - + ]) - + #: Options to customize the behavior of this plugin. Every option must be an - #: instance of :class:`OptionRecommendation`. + #: instance of :class:`OptionRecommendation`. options = set([]) - - #: A set of 3-tuples of the form + + #: A set of 3-tuples of the form #: (option_name, recommended_value, recommendation_level) recommendations = set([]) - + def convert(self, stream, options, file_ext, parse_cache, log, accelerators): ''' This method must be implemented in sub-classes. It must return - the path to the created OPF file. All output should be contained in + the path to the created OPF file. All output should be contained in the current directory. If this plugin creates files outside the current - directory they must be deleted/marked for deletion before this method + directory they must be deleted/marked for deletion before this method returns. - + :param stream: A file like object that contains the input file. - - :param options: Options to customize the conversion process. + + :param options: Options to customize the conversion process. Guaranteed to have attributes corresponding - to all the options declared by this plugin. In + to all the options declared by this plugin. In addition, it will have a verbose attribute that takes integral values from zero upwards. Higher numbers - mean be more verbose. Another useful attribute is - ``input_profile`` that is an instance of + mean be more verbose. Another useful attribute is + ``input_profile`` that is an instance of :class:`calibre.customize.profiles.InputProfile`. - + :param file_ext: The extension (without the .) of the input file. It is guaranteed to be one of the `file_types` supported by this plugin. - + :param parse_cache: A dictionary that maps absolute file paths to parsed representations of their contents. For - HTML the representation is an lxml element of + HTML the representation is an lxml element of the root of the tree. For CSS it is a cssutils stylesheet. If this plugin parses any of the output files, it should add them to the cache so that later stages of the conversion wont have to re-parse them. If a parsed representation - is in the cache, there is no need to actually + is in the cache, there is no need to actually write the file to disk. - - :param log: A :class:`calibre.utils.logging.Log` object. All output + + :param log: A :class:`calibre.utils.logging.Log` object. All output should use this object. - + :param accelarators: A dictionary of various information that the input plugin can get easily that would speed up the subsequent stages of the conversion. - + ''' raise NotImplementedError - - def __call__(self, stream, options, file_ext, parse_cache, log, + + def __call__(self, stream, options, file_ext, parse_cache, log, accelerators, output_dir): log('InputFormatPlugin: %s running'%self.name, end=' ') if hasattr(stream, 'name'): log('on', stream.name) - + with CurrentDir(output_dir): for x in os.listdir('.'): shutil.rmtree(x) if os.path.isdir(x) else os.remove(x) - - - ret = self.convert(stream, options, file_ext, parse_cache, + + + ret = self.convert(stream, options, file_ext, parse_cache, log, accelerators) for key in list(parse_cache.keys()): if os.path.abspath(key) != key: @@ -184,7 +184,7 @@ class InputFormatPlugin(Plugin): 'relative path: %s')%(self.name, key) ) parse_cache[os.path.abspath(key)] = parse_cache.pop(key) - + if options.debug_input is not None: options.debug_input = os.path.abspath(options.debug_input) if not os.path.exists(options.debug_input): @@ -194,15 +194,15 @@ class InputFormatPlugin(Plugin): if hasattr(obj, 'cssText'): raw = obj.cssText else: - raw = html.tostring(obj, encoding='utf-8', method='xml', + raw = html.tostring(obj, encoding='utf-8', method='xml', include_meta_content_type=True, pretty_print=True) if isinstance(raw, unicode): raw = raw.encode('utf-8') open(f, 'wb').write(raw) shutil.copytree('.', options.debug_input) - - - + + + return ret @@ -210,32 +210,32 @@ class OutputFormatPlugin(Plugin): ''' OutputFormatPlugins are responsible for converting an OEB document (OPF+HTML) into an output ebook. - + The OEB document can be assumed to be encoded in UTF-8. The main action happens in :method:`convert`. ''' - + type = _('Conversion Output') can_be_disabled = False supported_platforms = ['windows', 'osx', 'linux'] - + #: The file type (extension without leading period) that this #: plugin outputs file_type = None - + #: Options shared by all Input format plugins. Do not override #: in sub-classes. Use :member:`options` instead. Every option must be an - #: instance of :class:`OptionRecommendation`. + #: instance of :class:`OptionRecommendation`. common_options = set([]) - + #: Options to customize the behavior of this plugin. Every option must be an - #: instance of :class:`OptionRecommendation`. + #: instance of :class:`OptionRecommendation`. options = set([]) - - #: A set of 3-tuples of the form + + #: A set of 3-tuples of the form #: (option_name, recommended_value, recommendation_level) recommendations = set([]) - def convert(self, oeb_book, input_plugin, options, parse_cache, log): + def convert(self, oeb_book, input_plugin, options, context, log): raise NotImplementedError - + diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index f52264f8d0..211761e415 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -30,7 +30,7 @@ options. the available options depend on the input and output file types. \ To get help on them specify the input and output file and then use the -h \ option. -For full documentation of the conversion system see +For full documentation of the conversion system see ''') + 'http://calibre.kovidgoyal.net/user_manual/conversion.html' import sys, os @@ -50,22 +50,22 @@ def check_command_line_options(parser, args, log): print_help(parser) log.error('\n\nYou must specify the input AND output files') raise SystemExit(1) - + input = os.path.abspath(args[1]) if not os.access(input, os.R_OK): log.error('Cannot read from', input) raise SystemExit(1) - + output = args[2] if output.startswith('.'): output = os.path.splitext(os.path.basename(input))[0]+output output = os.path.abspath(output) - + if '.' in output: if os.path.exists(output): log.warn('WARNING:', output, 'exists. Deleting.') os.remove(output) - + return input, output def option_recommendation_to_cli_option(add_option, rec): @@ -79,18 +79,18 @@ def option_recommendation_to_cli_option(add_option, rec): def add_input_output_options(parser, plumber): input_options, output_options = \ plumber.input_options, plumber.output_options - + def add_options(group, options): for opt in options: option_recommendation_to_cli_option(group, opt) - + if input_options: title = _('INPUT OPTIONS') io = OptionGroup(parser, title, _('Options to control the processing' ' of the input %s file')%plumber.input_fmt) add_options(io.add_option, input_options) parser.add_option_group(io) - + if output_options: title = plumber.output_fmt.upper() + ' ' + _('OPTIONS') oo = OptionGroup(parser, title, _('Options to control the processing' @@ -106,7 +106,7 @@ def add_pipeline_options(parser, plumber): 'output_profile', ] ), - + 'METADATA' : (_('Options to set metadata in the output'), plumber.metadata_option_names, ), @@ -114,19 +114,19 @@ def add_pipeline_options(parser, plumber): [ 'verbose', ]), - - + + } - + group_order = ['', 'METADATA', 'DEBUG'] - + for group in group_order: desc, options = groups[group] if group: group = OptionGroup(parser, group, desc) parser.add_option_group(group) add_option = group.add_option if group != '' else parser.add_option - + for name in options: rec = plumber.get_option_by_name(name) if rec.level < rec.HIGH: @@ -141,27 +141,27 @@ def main(args=sys.argv): if len(args) < 3: print_help(parser, log) return 1 - + input, output = check_command_line_options(parser, args, log) - + from calibre.ebooks.conversion.plumber import Plumber - + plumber = Plumber(input, output, log) add_input_output_options(parser, plumber) add_pipeline_options(parser, plumber) - + opts = parser.parse_args(args)[0] - recommendations = [(n.dest, getattr(opts, n.dest), + recommendations = [(n.dest, getattr(opts, n.dest), OptionRecommendation.HIGH) \ for n in parser.options_iter() if n.dest] plumber.merge_ui_recommendations(recommendations) - + plumber.run() - + log(_('Output saved to'), ' ', plumber.output) - + return 0 - + if __name__ == '__main__': sys.exit(main()) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 75a6687c4e..44e2fda0c3 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -5,7 +5,7 @@ __docformat__ = 'restructuredtext en' import os -from calibre.customize.conversion import OptionRecommendation +from calibre.customize.conversion import OptionRecommendation from calibre.customize.ui import input_profiles, output_profiles, \ plugin_for_input_format, plugin_for_output_format @@ -13,23 +13,35 @@ class OptionValues(object): pass class Plumber(object): - + ''' + The `Plumber` manages the conversion pipeline. An UI should call the methods + :method:`merge_ui_recommendations` and then :method:`run`. The plumber will + take care of the rest. + ''' + metadata_option_names = [ - 'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments', - 'publisher', 'series', 'series_index', 'rating', 'isbn', + 'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments', + 'publisher', 'series', 'series_index', 'rating', 'isbn', 'tags', 'book_producer', 'language' ] - + def __init__(self, input, output, log): + ''' + :param input: Path to input file. + :param output: Path to output file/directory + ''' self.input = input self.output = output self.log = log - + + # Initialize the conversion options that are independent of input and + # output formats. The input and output plugins can still disable these + # options via recommendations. self.pipeline_options = [ -OptionRecommendation(name='verbose', +OptionRecommendation(name='verbose', recommended_value=0, level=OptionRecommendation.LOW, - short_switch='v', + short_switch='v', help=_('Level of verbosity. Specify multiple times for greater ' 'verbosity.') ), @@ -54,15 +66,15 @@ OptionRecommendation(name='output_profile', 'will work on a device. For example EPUB on the SONY reader.' ) ), - -OptionRecommendation(name='read_metadata_from_opf', + +OptionRecommendation(name='read_metadata_from_opf', recommended_value=None, level=OptionRecommendation.LOW, - short_switch='m', + short_switch='m', help=_('Read metadata from the specified OPF file. Metadata read ' - 'from this file will override any metadata in the source ' + 'from this file will override any metadata in the source ' 'file.') ), - + OptionRecommendation(name='title', recommended_value=None, level=OptionRecommendation.LOW, help=_('Set the title.')), @@ -120,57 +132,70 @@ OptionRecommendation(name='language', help=_('Set the language.')), ] - + input_fmt = os.path.splitext(input)[1] if not input_fmt: raise ValueError('Input file must have an extension') input_fmt = input_fmt[1:].lower() - + output_fmt = os.path.splitext(output)[1] if not output_fmt: output_fmt = '.oeb' output_fmt = output_fmt[1:].lower() - + self.input_plugin = plugin_for_input_format(input_fmt) self.output_plugin = plugin_for_output_format(output_fmt) - + if self.input_plugin is None: raise ValueError('No plugin to handle input format: '+input_fmt) - + if self.output_plugin is None: raise ValueError('No plugin to handle output format: '+output_fmt) - + self.input_fmt = input_fmt self.output_fmt = output_fmt - + + # Build set of all possible options. Two options are equal iff their + # names are the same. self.input_options = self.input_plugin.options.union( self.input_plugin.common_options) self.output_options = self.output_plugin.options.union( - self.output_plugin.common_options) - + self.output_plugin.common_options) + + # Remove the options that have been disabled by recommendations from the + # plugins. self.merge_plugin_recommendations() def get_option_by_name(self, name): - for group in (self.input_options, self.pipeline_options, + for group in (self.input_options, self.pipeline_options, self.output_options): for rec in group: if rec.option == name: return rec - + def merge_plugin_recommendations(self): for source in (self.input_plugin, self.output_plugin): for name, val, level in source.recommendations: rec = self.get_option_by_name(name) if rec is not None and rec.level <= level: rec.recommended_value = val - + def merge_ui_recommendations(self, recommendations): + ''' + Merge recommendations from the UI. As long as the UI recommendation + level is >= the baseline recommended level, the UI value is used, + *except* if the baseline has a recommendation level of `HIGH`. + ''' for name, val, level in recommendations: rec = self.get_option_by_name(name) if rec is not None and rec.level <= level and rec.level < rec.HIGH: rec.recommended_value = val - + def read_user_metadata(self): + ''' + Read all metadata specified by the user. Command line options override + metadata from a specified OPF file. + ''' from calibre.ebooks.metadata import MetaInformation, string_to_authors from calibre.ebooks.metadata.opf2 import OPF mi = MetaInformation(None, []) @@ -194,43 +219,55 @@ OptionRecommendation(name='language', mi.cover_data = ('', open(mi.cover, 'rb').read()) mi.cover = None self.user_metadata = mi - - + + def setup_options(self): + ''' + Setup the `self.opts` object. + ''' self.opts = OptionValues() - for group in (self.input_options, self.pipeline_options, + for group in (self.input_options, self.pipeline_options, self.output_options): for rec in group: setattr(self.opts, rec.option.name, rec.recommended_value) - + for x in input_profiles(): if x.short_name == self.opts.input_profile: self.opts.input_profile = x break - + for x in output_profiles(): if x.short_name == self.opts.output_profile: self.opts.output_profile = x break - + self.read_user_metadata() - + def run(self): + ''' + Run the conversion pipeline + ''' + # Setup baseline option values self.setup_options() + + # Run any preprocess plugins from calibre.customize.ui import run_plugins_on_preprocess self.input = run_plugins_on_preprocess(self.input) - + + # Create an OEBBook from the input file. The input plugin does all the + # heavy lifting. from calibre.ebooks.oeb.reader import OEBReader from calibre.ebooks.oeb.base import OEBBook parse_cache, accelerators = {}, {} - - opfpath = self.input_plugin(open(self.input, 'rb'), self.opts, + + opfpath = self.input_plugin(open(self.input, 'rb'), self.opts, self.input_fmt, parse_cache, self.log, accelerators) - + self.reader = OEBReader() - self.oeb = OEBBook(self.log, parse_cache=parse_cache) + self.oeb = OEBBook(self.log, parse_cache=parse_cache) + # Read OEB Book into OEBBook self.reader(self.oeb, opfpath) - - - \ No newline at end of file + + + diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 60328b6c81..c1e3549b10 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -1260,7 +1260,7 @@ class OEBBook(object): """Create empty book. Optional arguments: :param parse_cache: A cache of parsed XHTML/CSS. Keys are absolute - paths to te cached files and values are lxml root objects and + paths to the cached files and values are lxml root objects and cssutils stylesheets. :param:`encoding`: Default encoding for textual content read from an external container. diff --git a/src/calibre/ebooks/oeb/output.py b/src/calibre/ebooks/oeb/output.py index 0a74f488cf..d8d52859eb 100644 --- a/src/calibre/ebooks/oeb/output.py +++ b/src/calibre/ebooks/oeb/output.py @@ -6,12 +6,12 @@ __docformat__ = 'restructuredtext en' from calibre.customize.conversion import OutputFormatPlugin class OEBOutput(OutputFormatPlugin): - + name = 'OEB Output' author = 'Kovid Goyal' file_type = 'oeb' - - - def convert(self, oeb_book, input_plugin, options, parse_cache, log): - pass - + + + def convert(self, oeb_book, input_plugin, options, context, log): + pass + diff --git a/src/calibre/ebooks/oeb/transforms/__init__.py b/src/calibre/ebooks/oeb/transforms/__init__.py index e69de29bb2..3d1a86922e 100644 --- a/src/calibre/ebooks/oeb/transforms/__init__.py +++ b/src/calibre/ebooks/oeb/transforms/__init__.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + + + diff --git a/src/calibre/ebooks/oeb/transforms/trimmanifest.py b/src/calibre/ebooks/oeb/transforms/trimmanifest.py index 119ebcc73d..cae56315e5 100644 --- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py +++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py @@ -20,11 +20,10 @@ class ManifestTrimmer(object): @classmethod def generate(cls, opts): return cls() - + def __call__(self, oeb, context): oeb.logger.info('Trimming unused files from manifest...') used = set() - hrefs = oeb.manifest.hrefs for term in oeb.metadata: for item in oeb.metadata[term]: if item.value in oeb.manifest.hrefs: @@ -42,7 +41,7 @@ class ManifestTrimmer(object): while unchecked: new = set() for item in unchecked: - if (item.media_type in OEB_DOCS or + if (item.media_type in OEB_DOCS or item.media_type[-4:] in ('/xml', '+xml')) and \ item.data is not None: hrefs = [sel(item.data) for sel in LINK_SELECTORS]