From 4e128c10736b2695976812b6d4ca893f152a995e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 8 Mar 2009 13:45:54 -0700 Subject: [PATCH] Commit so I can pull from trunk --- src/calibre/__init__.py | 19 --- src/calibre/customize/builtins.py | 3 +- src/calibre/customize/conversion.py | 47 +++++++- src/calibre/customize/profiles.py | 36 +++++- src/calibre/customize/ui.py | 24 +++- src/calibre/ebooks/conversion/cli.py | 146 +++++++++++++++++++++++ src/calibre/ebooks/conversion/plumber.py | 73 +++++++++++- src/calibre/ebooks/html.py | 7 +- src/calibre/ebooks/mobi/input.py | 7 +- src/calibre/ebooks/oeb/output.py | 17 +++ src/calibre/linux.py | 1 + src/calibre/utils/logging.py | 28 +++-- 12 files changed, 356 insertions(+), 52 deletions(-) create mode 100644 src/calibre/ebooks/conversion/cli.py create mode 100644 src/calibre/ebooks/oeb/output.py diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index de133ddb57..030aab8317 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -7,7 +7,6 @@ import sys, os, re, logging, time, subprocess, atexit, mimetypes, \ __builtin__.__dict__['dynamic_property'] = lambda(func): func(None) from htmlentitydefs import name2codepoint from math import floor -from logging import Formatter from PyQt4.QtCore import QUrl from PyQt4.QtGui import QDesktopServices @@ -318,24 +317,6 @@ def english_sort(x, y): ''' return cmp(_spat.sub('', x), _spat.sub('', y)) -class ColoredFormatter(Formatter): - - def format(self, record): - ln = record.__dict__['levelname'] - col = '' - if ln == 'CRITICAL': - col = terminal_controller.YELLOW - elif ln == 'ERROR': - col = terminal_controller.RED - elif ln in ['WARN', 'WARNING']: - col = terminal_controller.BLUE - elif ln == 'INFO': - col = terminal_controller.GREEN - elif ln == 'DEBUG': - col = terminal_controller.CYAN - record.__dict__['levelname'] = col + record.__dict__['levelname'] + terminal_controller.NORMAL - return Formatter.format(self, record) - def walk(dir): ''' A nice interface to os.walk ''' for record in os.walk(dir): diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index fafe8e5afa..ca21bbb215 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -244,9 +244,10 @@ class MOBIMetadataWriter(MetadataWriterPlugin): from calibre.ebooks.epub.input import EPUBInput from calibre.ebooks.mobi.input import MOBIInput +from calibre.ebooks.oeb.output import OEBOutput from calibre.customize.profiles import input_profiles -plugins = [HTML2ZIP, EPUBInput, MOBIInput] +plugins = [HTML2ZIP, EPUBInput, MOBIInput, OEBOutput] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ x.__name__.endswith('MetadataReader')] plugins += [x for x in list(locals().values()) if isinstance(x, type) and \ diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py index aa7b0c1dea..f20cc4ae85 100644 --- a/src/calibre/customize/conversion.py +++ b/src/calibre/customize/conversion.py @@ -37,19 +37,24 @@ class ConversionOption(object): if not self.help: raise ValueError('You must set the help text') + def __hash__(self): + return hash(self.name) + + def __eq__(self, other): + return hash(self) == hash(other) class OptionRecommendation(object): LOW = 1 MED = 2 HIGH = 3 - def __init__(self, recommeded_value, level=LOW, **kwargs): + def __init__(self, recommended_value=None, level=LOW, **kwargs): ''' An option recommendation. That is, an option as well as its recommended value and the level of the recommendation. ''' self.level = level - self.recommended_value = recommeded_value + self.recommended_value = recommended_value self.option = kwargs.pop('option', None) if self.option is None: self.option = ConversionOption(**kwargs) @@ -59,10 +64,12 @@ class OptionRecommendation(object): def validate_parameters(self): if self.option.choices and self.recommended_value not in \ self.option.choices: - raise ValueError('Recommended value not in choices') + raise ValueError('OpRec: %s: Recommended value not in choices'% + self.option.name) if not (isinstance(self.recommended_value, (int, float, str, unicode))\ - or self.default is None): - raise ValueError(unicode(self.default) + + or self.recommended_value is None): + raise ValueError('OpRec: %s:'%self.option.name + + repr(self.recommended_value) + ' is not a string or a number') @@ -186,4 +193,34 @@ class InputFormatPlugin(Plugin): return ret + + +class OutputFormatPlugin(Plugin): + ''' + OutputFormatPlugins are responsible for converting an OEB document + (OPF+HTML) into an output ebook. + + The OEB document can be assumed to be encoded in UTF-8. + The main action happens in :method:`convert`. + ''' + + type = _('Conversion Output') + can_be_disabled = False + supported_platforms = ['windows', 'osx', 'linux'] + + #: The file type (extension without leading period) that this + #: plugin outputs + file_type = None + + #: Options shared by all Input format plugins. Do not override + #: in sub-classes. Use :member:`options` instead. Every option must be an + #: instance of :class:`OptionRecommendation`. + common_options = set([]) + + #: Options to customize the behavior of this plugin. Every option must be an + #: instance of :class:`OptionRecommendation`. + options = set([]) + + def convert(self, oeb_book, input_plugin, options, parse_cache, log): + raise NotImplementedError diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 002f56879f..a3a7e22298 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -3,6 +3,7 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import sys, re from calibre.customize import Plugin class InputProfile(Plugin): @@ -16,12 +17,43 @@ class InputProfile(Plugin): # inherit from this profile and override as needed name = 'Default Input Profile' - short_name = 'default' # Used in the CLI so dont spaces etc. in it + short_name = 'default' # Used in the CLI so dont use spaces etc. in it description = _('This profile tries to provide sane defaults and is useful ' 'if you know nothing about the input document.') input_profiles = [InputProfile] - +class OutputProfile(Plugin): + author = 'Kovid Goyal' + supported_platforms = set(['windows', 'osx', 'linux']) + can_be_disabled = False + type = _('Output profile') + + name = 'Default Output Profile' + short_name = 'default' # Used in the CLI so dont use spaces etc. in it + description = _('This profile tries to provide sane defaults and is useful ' + 'if you want to produce a document intended to be read at a ' + 'computer or on a range of devices.') + + epub_flow_size = sys.maxint + screen_size = None + remove_special_chars = False + remove_object_tags = False + +class SonyReader(OutputProfile): + + name = 'Sony Reader' + short_name = 'sony' + description = _('This profile is intended for the SONY PRS line. ' + 'The 500/505/700 etc.') + + epub_flow_size = 270000 + screen_size = (590, 765) + remove_special_chars = re.compile(u'[\u200b\u00ad]') + remove_object_tags = True + + + +output_profiles = [OutputProfile, SonyReader] \ No newline at end of file diff --git a/src/calibre/customize/ui.py b/src/calibre/customize/ui.py index 1cdafae4f0..d8b7ebf6d8 100644 --- a/src/calibre/customize/ui.py +++ b/src/calibre/customize/ui.py @@ -6,8 +6,8 @@ import os, shutil, traceback, functools, sys from calibre.customize import Plugin, FileTypePlugin, MetadataReaderPlugin, \ MetadataWriterPlugin -from calibre.customize.conversion import InputFormatPlugin -from calibre.customize.profiles import InputProfile +from calibre.customize.conversion import InputFormatPlugin, OutputFormatPlugin +from calibre.customize.profiles import InputProfile, OutputProfile from calibre.customize.builtins import plugins as builtin_plugins from calibre.constants import __version__, iswindows, isosx from calibre.ebooks.metadata import MetaInformation @@ -76,6 +76,12 @@ def input_profiles(): if isinstance(plugin, InputProfile): yield plugin +def output_profiles(): + for plugin in _initialized_plugins: + if isinstance(plugin, OutputProfile): + yield plugin + + def reread_filetype_plugins(): global _on_import global _on_preprocess @@ -245,9 +251,19 @@ def input_format_plugins(): def plugin_for_input_format(fmt): for plugin in input_format_plugins(): - if fmt in plugin.file_types: + if fmt.lower() in plugin.file_types: return plugin - + +def output_format_plugins(): + for plugin in _initialized_plugins: + if isinstance(plugin, OutputFormatPlugin): + yield plugin + +def plugin_for_output_format(fmt): + for plugin in output_format_plugins(): + if fmt.lower() == plugin.file_type: + return plugin + def disable_plugin(plugin_or_name): x = getattr(plugin_or_name, 'name', plugin_or_name) diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py new file mode 100644 index 0000000000..174fa87a5d --- /dev/null +++ b/src/calibre/ebooks/conversion/cli.py @@ -0,0 +1,146 @@ +from __future__ import with_statement +__license__ = 'GPL 3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +''' +Command line interface to conversion sub-system +''' + +USAGE = '%prog ' + _('''\ +input_file output_file [options] + +Convert an ebook from one format to another. + +input_file is the input and output_file is the output. Both must be +specified as the first two arguments to the command. + +The output ebook format is guessed from the file extension of +output_file. output_file can also be of the special format .EXT where +EXT is the output file extension. In this case, the name of the output +file is derived the name of the input file. Note that the filenames must +not start with a hyphen. Finally, if output_file has no extension, then +it is treated as a directory and an "open ebook" (OEB) consisting of HTML files +is written to that directory. These files are the files that would normally +have been passed to the output plugin. + + +After specifying the input +and output file you can customize the conversion by specifying various +options, listed below. + +For full documentation of the conversion system see + +''') + 'http://calibre.kovidgoyal.net/user_manual/conversion.html' + +import sys, os + +from calibre.utils.config import OptionParser +from calibre.utils.logging import Log +from calibre.constants import preferred_encoding + +def print_help(parser, log): + help = parser.format_help().encode(preferred_encoding, 'replace') + log(help) + +def check_command_line_options(parser, args, log): + if len(args) < 3 or args[1].startswith('-') or args[2].startswith('-'): + print_help(parser) + log.error('\n\nYou must specify the input AND output files') + raise SystemExit(1) + + input = os.path.abspath(args[1]) + if not os.access(input, os.R_OK): + log.error('Cannot read from', input) + raise SystemExit(1) + + output = args[2] + if output.startswith('.'): + output = os.path.splitext(os.path.basename(input))[0]+output + output = os.path.abspath(output) + + if '.' in output: + if os.path.exists(output): + log.warn('WARNING:', output, 'exists. Deleting.') + os.remove(output) + + return input, output + +def option_recommendation_to_cli_option(add_option, rec): + opt = rec.option + switches = [opt.short_switch] if opt.short_switch else [] + switches.append(opt.long_switch) + add_option(opt.name, switches=switches, help=opt.help, + choices=opt.choices, default=rec.recommended_value) + +def add_input_output_options(parser, plumber): + input_options, output_options = \ + plumber.input_options, plumber.output_options + + def add_options(group, options): + for opt in options: + option_recommendation_to_cli_option(group, opt) + + if input_options: + io = parser.add_group(plumber.input_fmt.upper() + ' ' + _('OPTIONS')) + add_options(io, input_options) + + if output_options: + oo = parser.add_group(plumber.output_fmt.upper() + ' ' + _('OPTIONS')) + add_options(oo, output_options) + +def add_pipeline_options(parser, plumber): + groups = { + '' : ('', + [ + 'input_profile', + 'output_profile', + ] + ), + + 'DEBUG': (_('Options to help with debugging the conversion'), + [ + 'verbose', + ]), + + + } + + + for group, spec in groups.items(): + desc, options = spec + if group: + group = parser.add_option_group(group, desc) + add_option = group if group != '' else parser.add_option + + for name in options: + rec = plumber.get_option_by_name(name) + if rec.level < rec.HIGH: + option_recommendation_to_cli_option(add_option, rec) + + + + +def main(args=sys.argv): + log = Log() + parser = OptionParser(usage=USAGE) + fargs = parser.parse_args(args)[1] + + input, output = check_command_line_options(parser, fargs, log) + + from calibre.ebooks.conversion.plumber import Plumber + + plumber = Plumber(input, output, log) + add_input_output_options(parser, plumber) + add_pipeline_options(parser, plumber) + + opts = parser.parse_args(args)[0] + recommendations = [(n.dest, getattr(opts, n.dest)) \ + for n in parser.options_iter()] + + plumber.merge_ui_recommendations(recommendations) + + return 0 + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index ac7490bd39..742653251d 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -3,11 +3,15 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import os from calibre.customize.conversion import OptionRecommendation -from calibre.customize.ui import input_profiles +from calibre.customize.ui import input_profiles, output_profiles, \ + plugin_for_input_format, plugin_for_output_format -pipeline_options = [ +class Plumber(object): + + pipeline_options = [ OptionRecommendation(name='verbose', recommended_value=0, level=OptionRecommendation.LOW, @@ -16,7 +20,6 @@ OptionRecommendation(name='verbose', 'verbosity.') ), - OptionRecommendation(name='input_profile', recommended_value='default', level=OptionRecommendation.LOW, choices=[x.short_name for x in input_profiles()], @@ -27,4 +30,66 @@ OptionRecommendation(name='input_profile', 'pixels).') ), -] \ No newline at end of file +OptionRecommendation(name='output_profile', + recommended_value='default', level=OptionRecommendation.LOW, + choices=[x.short_name for x in output_profiles()], + help=_('Specify the output profile. The output profile ' + 'tells the conversion system how to optimize the ' + 'created document for the specified device. In some cases, ' + 'an output profile is required to produce documents that ' + 'will work on a device. For example EPUB on the SONY reader.' + ) + ), + +] + + def __init__(self, input, output, log): + self.input = input + self.output = output + self.log = log + + input_fmt = os.path.splitext(input)[1] + if not input_fmt: + raise ValueError('Input file must have and extension') + input_fmt = input_fmt[1:].lower() + + output_fmt = os.path.splitext(input)[1] + if not output_fmt: + output_fmt = '.oeb' + output_fmt = output_fmt[1:].lower() + + self.input_plugin = plugin_for_input_format(input_fmt) + self.output_plugin = plugin_for_output_format(output_fmt) + + if self.input_plugin is None: + raise ValueError('No plugin to handle input format: '+input_fmt) + + if self.output_plugin is None: + raise ValueError('No plugin to handle output format: '+output_fmt) + + self.input_fmt = input_fmt + self.output_fmt = output_fmt + + self.input_options = self.input_plugin.options.union( + self.input_plugin.common_options) + self.output_options = self.output_plugin.options.union( + self.output_plugin.common_options) + + self.merge_plugin_recommendations() + + def get_option_by_name(self, name): + for group in (self.input_options, self.pipeline_options, + self.output_options): + for rec in group: + if rec.option == name: + return rec + + def merge_plugin_recommendations(self): + pass + + def merge_ui_recommendations(self, recommendations): + pass + + + + \ No newline at end of file diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index 710b544007..191d552709 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -19,11 +19,10 @@ from lxml.html import HtmlElementClassLookup, HTMLParser as _HTMLParser, \ from lxml.etree import XPath get_text = XPath("//text()") -from calibre import LoggingInterface, unicode_path, entity_to_unicode +from calibre import unicode_path, entity_to_unicode from calibre.ebooks.chardet import xml_to_unicode, ENCODING_PATS from calibre.utils.config import Config, StringConfig from calibre.ebooks.metadata import MetaInformation -from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.opf2 import OPF, OPFCreator from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.utils.zipfile import ZipFile @@ -401,7 +400,7 @@ class PreProcessor(object): html = rule[0].sub(rule[1], html) return html -class Parser(PreProcessor, LoggingInterface): +class Parser(PreProcessor): # SELF_CLOSING_TAGS = 'hr|br|link|img|meta|input|area|base|basefont' # SELF_CLOSING_RULES = [re.compile(p[0]%SELF_CLOSING_TAGS, re.IGNORECASE) for p in # [ @@ -412,7 +411,6 @@ class Parser(PreProcessor, LoggingInterface): # ] def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, name='htmlparser'): - LoggingInterface.__init__(self, logging.getLogger(name)) self.setup_cli_handler(opts.verbose) self.htmlfile = htmlfile self.opts = opts @@ -1038,6 +1036,7 @@ def merge_metadata(htmlfile, opf, opts): if opf: mi = MetaInformation(opf) elif htmlfile: + from calibre.ebooks.metadata.meta import get_metadata try: mi = get_metadata(open(htmlfile, 'rb'), 'html') except: diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py index 1ce9950677..fa56b5c6b4 100644 --- a/src/calibre/ebooks/mobi/input.py +++ b/src/calibre/ebooks/mobi/input.py @@ -3,8 +3,6 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os - from calibre.customize.conversion import InputFormatPlugin class MOBIInput(InputFormatPlugin): @@ -18,12 +16,11 @@ class MOBIInput(InputFormatPlugin): from calibre.ebooks.mobi.reader import MobiReader mr = MobiReader(stream, log, options.input_encoding, options.debug_input) - mr.extract_content(output_dir=os.getcwdu(), parse_cache) + mr.extract_content('.', parse_cache) raw = parse_cache.get('calibre_raw_mobi_markup', False) if raw: if isinstance(raw, unicode): raw = raw.encode('utf-8') open('debug-raw.html', 'wb').write(raw) - return mr.created_opf_path - + return mr.created_opf_path \ No newline at end of file diff --git a/src/calibre/ebooks/oeb/output.py b/src/calibre/ebooks/oeb/output.py new file mode 100644 index 0000000000..0a74f488cf --- /dev/null +++ b/src/calibre/ebooks/oeb/output.py @@ -0,0 +1,17 @@ +from __future__ import with_statement +__license__ = 'GPL 3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from calibre.customize.conversion import OutputFormatPlugin + +class OEBOutput(OutputFormatPlugin): + + name = 'OEB Output' + author = 'Kovid Goyal' + file_type = 'oeb' + + + def convert(self, oeb_book, input_plugin, options, parse_cache, log): + pass + diff --git a/src/calibre/linux.py b/src/calibre/linux.py index 427b41ca5f..ae6cb10818 100644 --- a/src/calibre/linux.py +++ b/src/calibre/linux.py @@ -18,6 +18,7 @@ entry_points = { 'console_scripts': [ \ 'ebook-device = calibre.devices.prs500.cli.main:main', 'ebook-meta = calibre.ebooks.metadata.cli:main', + 'ebook-convert = calibre.ebooks.convert.cli:main', 'txt2lrf = calibre.ebooks.lrf.txt.convert_from:main', 'html2lrf = calibre.ebooks.lrf.html.convert_from:main', 'html2oeb = calibre.ebooks.html:main', diff --git a/src/calibre/utils/logging.py b/src/calibre/utils/logging.py index ae2e1a792b..d5a55ac48b 100644 --- a/src/calibre/utils/logging.py +++ b/src/calibre/utils/logging.py @@ -13,13 +13,25 @@ ERROR = 3 import sys, traceback from functools import partial -from calibre import prints -from calibre.utils.terminfo import TerminalController -class ANSIStream: + + +class Stream(object): + + def __init__(self, stream): + from calibre import prints + self._prints = prints + self.stream = stream + + def flush(self): + self.stream.flush() + + +class ANSIStream(Stream): def __init__(self, stream=sys.stdout): - self.stream = stream + Stream.__init__(self, stream) + from calibre.utils.terminfo import TerminalController tc = TerminalController(stream) self.color = { DEBUG: tc.GREEN, @@ -32,16 +44,16 @@ class ANSIStream: def prints(self, level, *args, **kwargs): self.stream.write(self.color[level]) kwargs['file'] = self.stream - prints(*args, **kwargs) + self._prints(*args, **kwargs) self.stream.write(self.normal) def flush(self): self.stream.flush() -class HTMLStream: +class HTMLStream(Stream): def __init__(self, stream=sys.stdout): - self.stream = stream + Stream.__init__(self, stream) self.color = { DEBUG: '', INFO:'', @@ -53,7 +65,7 @@ class HTMLStream: def prints(self, level, *args, **kwargs): self.stream.write(self.color[level]) kwargs['file'] = self.stream - prints(*args, **kwargs) + self._prints(*args, **kwargs) self.stream.write(self.normal) def flush(self):