diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py index 10e5a44ddd..a77e32beee 100644 --- a/src/calibre/customize/conversion.py +++ b/src/calibre/customize/conversion.py @@ -117,7 +117,11 @@ class InputFormatPlugin(Plugin): #: instance of :class:`OptionRecommendation`. options = set([]) - def convert(self, stream, options, file_ext, parse_cache, log): + #: A set of 3-tuples of the form + #: (option_name, recommended_value, recommendation_level) + recommendations = set([]) + + def convert(self, stream, options, file_ext, parse_cache, log, accelerators): ''' This method must be implemented in sub-classes. It must return the path to the created OPF file. All output should be contained in @@ -153,10 +157,16 @@ class InputFormatPlugin(Plugin): :param log: A :class:`calibre.utils.logging.Log` object. All output should use this object. + + :param accelarators: A dictionary of various information that the input + plugin can get easily that would speed up the + subsequent stages of the conversion. + ''' raise NotImplementedError - def __call__(self, stream, options, file_ext, parse_cache, log, output_dir): + def __call__(self, stream, options, file_ext, parse_cache, log, + accelerators, output_dir): log('InputFormatPlugin: %s running'%self.name, end=' ') if hasattr(stream, 'name'): log('on', stream.name) @@ -166,7 +176,8 @@ class InputFormatPlugin(Plugin): shutil.rmtree(x) if os.path.isdir(x) else os.remove(x) - ret = self.convert(stream, options, file_ext, parse_cache, log) + ret = self.convert(stream, options, file_ext, parse_cache, + log, accelerators) for key in list(parse_cache.keys()): if os.path.abspath(key) != key: log.warn(('InputFormatPlugin: %s returned a ' @@ -221,6 +232,10 @@ class OutputFormatPlugin(Plugin): #: instance of :class:`OptionRecommendation`. options = set([]) + #: A set of 3-tuples of the form + #: (option_name, recommended_value, recommendation_level) + recommendations = set([]) + def convert(self, oeb_book, input_plugin, options, parse_cache, log): raise NotImplementedError diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index 9a320bc40f..f52264f8d0 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -39,6 +39,7 @@ from optparse import OptionGroup, Option from calibre.utils.config import OptionParser from calibre.utils.logging import Log from calibre.constants import preferred_encoding +from calibre.customize.conversion import OptionRecommendation def print_help(parser, log): help = parser.format_help().encode(preferred_encoding, 'replace') @@ -84,16 +85,16 @@ def add_input_output_options(parser, plumber): option_recommendation_to_cli_option(group, opt) if input_options: - title = plumber.input_fmt.upper() + ' ' + _('OPTIONS') + title = _('INPUT OPTIONS') io = OptionGroup(parser, title, _('Options to control the processing' - ' of the input file')) + ' of the input %s file')%plumber.input_fmt) add_options(io.add_option, input_options) parser.add_option_group(io) if output_options: title = plumber.output_fmt.upper() + ' ' + _('OPTIONS') oo = OptionGroup(parser, title, _('Options to control the processing' - ' of the output file')) + ' of the output %s file')%plumber.input_fmt) add_options(oo.add_option, output_options) parser.add_option_group(oo) @@ -106,6 +107,9 @@ def add_pipeline_options(parser, plumber): ] ), + 'METADATA' : (_('Options to set metadata in the output'), + plumber.metadata_option_names, + ), 'DEBUG': (_('Options to help with debugging the conversion'), [ 'verbose', @@ -114,7 +118,7 @@ def add_pipeline_options(parser, plumber): } - group_order = ['', 'DEBUG'] + group_order = ['', 'METADATA', 'DEBUG'] for group in group_order: desc, options = groups[group] @@ -147,11 +151,16 @@ def main(args=sys.argv): add_pipeline_options(parser, plumber) opts = parser.parse_args(args)[0] - recommendations = [(n.dest, getattr(opts, n.dest)) \ - for n in parser.options_iter()] - + recommendations = [(n.dest, getattr(opts, n.dest), + OptionRecommendation.HIGH) \ + for n in parser.options_iter() + if n.dest] plumber.merge_ui_recommendations(recommendations) + plumber.run() + + log(_('Output saved to'), ' ', plumber.output) + return 0 if __name__ == '__main__': diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index bd4d365af8..75a6687c4e 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -9,9 +9,23 @@ from calibre.customize.conversion import OptionRecommendation from calibre.customize.ui import input_profiles, output_profiles, \ plugin_for_input_format, plugin_for_output_format +class OptionValues(object): + pass + class Plumber(object): - pipeline_options = [ + metadata_option_names = [ + 'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments', + 'publisher', 'series', 'series_index', 'rating', 'isbn', + 'tags', 'book_producer', 'language' + ] + + def __init__(self, input, output, log): + self.input = input + self.output = output + self.log = log + + self.pipeline_options = [ OptionRecommendation(name='verbose', recommended_value=0, level=OptionRecommendation.LOW, @@ -40,13 +54,72 @@ OptionRecommendation(name='output_profile', 'will work on a device. For example EPUB on the SONY reader.' ) ), + +OptionRecommendation(name='read_metadata_from_opf', + recommended_value=None, level=OptionRecommendation.LOW, + short_switch='m', + help=_('Read metadata from the specified OPF file. Metadata read ' + 'from this file will override any metadata in the source ' + 'file.') + ), + +OptionRecommendation(name='title', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the title.')), +OptionRecommendation(name='authors', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the authors. Multiple authors should be separated ')), + +OptionRecommendation(name='title_sort', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('The version of the title to be used for sorting. ')), + +OptionRecommendation(name='author_sort', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('String to be used when sorting by author. ')), + +OptionRecommendation(name='cover', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the cover to the specified file.')), + +OptionRecommendation(name='comments', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the ebook description.')), + +OptionRecommendation(name='publisher', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the ebook publisher.')), + +OptionRecommendation(name='series', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the series this ebook belongs to.')), + +OptionRecommendation(name='series_index', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the index of the book in this series.')), + +OptionRecommendation(name='rating', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the rating. Should be a number between 1 and 5.')), + +OptionRecommendation(name='isbn', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the ISBN of the book.')), + +OptionRecommendation(name='tags', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the tags for the book. Should be a comma separated list.')), + +OptionRecommendation(name='book_producer', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the book producer.')), + +OptionRecommendation(name='language', + recommended_value=None, level=OptionRecommendation.LOW, + help=_('Set the language.')), ] - def __init__(self, input, output, log): - self.input = input - self.output = output - self.log = log input_fmt = os.path.splitext(input)[1] if not input_fmt: @@ -85,11 +158,79 @@ OptionRecommendation(name='output_profile', return rec def merge_plugin_recommendations(self): - pass + for source in (self.input_plugin, self.output_plugin): + for name, val, level in source.recommendations: + rec = self.get_option_by_name(name) + if rec is not None and rec.level <= level: + rec.recommended_value = val def merge_ui_recommendations(self, recommendations): - pass + for name, val, level in recommendations: + rec = self.get_option_by_name(name) + if rec is not None and rec.level <= level and rec.level < rec.HIGH: + rec.recommended_value = val + def read_user_metadata(self): + from calibre.ebooks.metadata import MetaInformation, string_to_authors + from calibre.ebooks.metadata.opf2 import OPF + mi = MetaInformation(None, []) + if self.opts.read_metadata_from_opf is not None: + self.opts.read_metadata_from_opf = os.path.abspath( + self.opts.read_metadata_from_opf) + opf = OPF(open(self.opts.read_metadata_from_opf, 'rb'), + os.path.dirname(self.opts.read_metadata_from_opf)) + mi = MetaInformation(opf) + for x in self.metadata_option_names: + val = getattr(self.opts, x, None) + if val is not None: + if x == 'authors': + val = string_to_authors(val) + elif x == 'tags': + val = [i.strip() for i in val.split(',')] + elif x in ('rating', 'series_index'): + val = float(val) + setattr(mi, x, val) + if mi.cover: + mi.cover_data = ('', open(mi.cover, 'rb').read()) + mi.cover = None + self.user_metadata = mi + + def setup_options(self): + self.opts = OptionValues() + for group in (self.input_options, self.pipeline_options, + self.output_options): + for rec in group: + setattr(self.opts, rec.option.name, rec.recommended_value) + + for x in input_profiles(): + if x.short_name == self.opts.input_profile: + self.opts.input_profile = x + break + + for x in output_profiles(): + if x.short_name == self.opts.output_profile: + self.opts.output_profile = x + break + + self.read_user_metadata() + + def run(self): + self.setup_options() + from calibre.customize.ui import run_plugins_on_preprocess + self.input = run_plugins_on_preprocess(self.input) + + from calibre.ebooks.oeb.reader import OEBReader + from calibre.ebooks.oeb.base import OEBBook + parse_cache, accelerators = {}, {} + + opfpath = self.input_plugin(open(self.input, 'rb'), self.opts, + self.input_fmt, parse_cache, self.log, + accelerators) + + self.reader = OEBReader() + self.oeb = OEBBook(self.log, parse_cache=parse_cache) + self.reader(self.oeb, opfpath) + \ No newline at end of file diff --git a/src/calibre/ebooks/epub/input.py b/src/calibre/ebooks/epub/input.py index 1b69424a9e..4c1cdbfcf5 100644 --- a/src/calibre/ebooks/epub/input.py +++ b/src/calibre/ebooks/epub/input.py @@ -51,7 +51,8 @@ class EPUBInput(InputFormatPlugin): traceback.print_exc() return False - def convert(self, stream, options, file_ext, parse_cache, log): + def convert(self, stream, options, file_ext, parse_cache, log, + accelerators): from calibre.utils.zipfile import ZipFile from calibre import walk from calibre.ebooks import DRMError diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py index fa56b5c6b4..b3400c54e1 100644 --- a/src/calibre/ebooks/mobi/input.py +++ b/src/calibre/ebooks/mobi/input.py @@ -12,7 +12,8 @@ class MOBIInput(InputFormatPlugin): description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML' file_types = set(['mobi', 'prc', 'azw']) - def convert(self, stream, options, file_ext, parse_cache, log): + def convert(self, stream, options, file_ext, parse_cache, log, + accelerators): from calibre.ebooks.mobi.reader import MobiReader mr = MobiReader(stream, log, options.input_encoding, options.debug_input) @@ -22,5 +23,8 @@ class MOBIInput(InputFormatPlugin): if isinstance(raw, unicode): raw = raw.encode('utf-8') open('debug-raw.html', 'wb').write(raw) - + for f, root in parse_cache.items(): + if '.' in f: + accelerators[f] = {'pagebreaks':root.xpath( + '//div[@class="mbp_pagebreak"]')} return mr.created_opf_path \ No newline at end of file diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 86224488c0..6ebeba3739 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -9,7 +9,6 @@ __copyright__ = '2008, Marshall T. Vandegrift ' import sys import os from struct import pack -import functools import time import random from cStringIO import StringIO @@ -18,11 +17,10 @@ from itertools import izip, count from collections import defaultdict from urlparse import urldefrag import logging -from lxml import etree from PIL import Image from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \ OEB_RASTER_IMAGES -from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname +from calibre.ebooks.oeb.base import namespace, prefixname from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.base import OEBBook from calibre.ebooks.oeb.profile import Context diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index f7c472320e..59ce1f7b95 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -7,7 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' __docformat__ = 'restructuredtext en' -import os, sys, re, uuid +import os, re, uuid from mimetypes import types_map from collections import defaultdict from itertools import count @@ -203,14 +203,6 @@ class OEBError(Exception): """Generic OEB-processing error.""" pass - -class FauxLogger(object): - """Fake logging interface.""" - def __getattr__(self, name): - return self - def __call__(self, message): - print message - class NullContainer(object): """An empty container. @@ -1224,16 +1216,20 @@ class PageList(object): class OEBBook(object): """Representation of a book in the IDPF OEB data model.""" - def __init__(self, encoding=None, pretty_print=False, logger=FauxLogger()): + def __init__(self, logger, parse_cache={}, encoding='utf-8', + pretty_print=False): """Create empty book. Optional arguments: + :param parse_cache: A cache of parsed XHTML/CSS. Keys are absolute + paths to te cached files and values are lxml root objects and + cssutils stylesheets. :param:`encoding`: Default encoding for textual content read from an external container. :param:`pretty_print`: Whether or not the canonical string form of XML markup is pretty-printed. - :prama:`logger`: A Logger object to use for logging all messages + :param:`logger`: A Log object to use for logging all messages related to the processing of this book. It is accessible - via the instance data member :attr:`logger`. + via the instance data members :attr:`logger,log`. It provides the following public instance data members for accessing various parts of the OEB data model: @@ -1251,7 +1247,7 @@ class OEBBook(object): """ self.encoding = encoding self.pretty_print = pretty_print - self.logger = logger + self.logger = self.log = logger self.version = '2.0' self.container = NullContainer() self.metadata = Metadata(self) diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index 0fce1c2b0d..dbafa5afac 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -19,9 +19,9 @@ from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_IMAGES, \ PAGE_MAP_MIME, JPEG_MIME, NCX_MIME, SVG_MIME from calibre.ebooks.oeb.base import XMLDECL_RE, COLLAPSE_RE, CSSURL_RE, \ ENTITY_RE, LINK_SELECTORS, MS_COVER_TYPE -from calibre.ebooks.oeb.base import namespace, barename, qname, XPath, xpath -from calibre.ebooks.oeb.base import urlnormalize, xml2str -from calibre.ebooks.oeb.base import OEBError, OEBBook, DirContainer +from calibre.ebooks.oeb.base import namespace, barename, qname, XPath, xpath, \ + urlnormalize, BINARY_MIME, \ + OEBError, OEBBook, DirContainer from calibre.ebooks.oeb.writer import OEBWriter from calibre.ebooks.oeb.entitydefs import ENTITYDEFS from calibre.ebooks.metadata.epub import CoverRenderer @@ -45,9 +45,6 @@ class OEBReader(object): TRANSFORMS = [] """List of transforms to apply to content read with this Reader.""" - def __init__(self): - return - @classmethod def config(cls, cfg): """Add any book-reading options to the :class:`Config` object @@ -65,7 +62,7 @@ class OEBReader(object): :param:`oeb`. """ self.oeb = oeb - self.logger = oeb.logger + self.logger = self.log = oeb.logger oeb.container = self.Container(path) opf = self._read_opf() self._all_from_opf(opf) diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index ede2a027ed..9833b3b4d0 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -6,18 +6,14 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -import sys -import os import re import operator import math -from itertools import chain from collections import defaultdict from lxml import etree from calibre.ebooks.oeb.base import XHTML, XHTML_NS from calibre.ebooks.oeb.base import CSS_MIME, OEB_STYLES from calibre.ebooks.oeb.base import namespace, barename -from calibre.ebooks.oeb.base import OEBBook from calibre.ebooks.oeb.stylizer import Stylizer COLLAPSE = re.compile(r'[ \t\r\n\v]+') diff --git a/src/calibre/ebooks/oeb/transforms/htmltoc.py b/src/calibre/ebooks/oeb/transforms/htmltoc.py index 0040f39c14..4504059531 100644 --- a/src/calibre/ebooks/oeb/transforms/htmltoc.py +++ b/src/calibre/ebooks/oeb/transforms/htmltoc.py @@ -6,9 +6,6 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -import sys -import os -from lxml import etree from calibre.ebooks.oeb.base import XML, XHTML, XHTML_NS from calibre.ebooks.oeb.base import XHTML_MIME, CSS_MIME from calibre.ebooks.oeb.base import element diff --git a/src/calibre/ebooks/oeb/transforms/manglecase.py b/src/calibre/ebooks/oeb/transforms/manglecase.py index c819475a4d..4b852db6c4 100644 --- a/src/calibre/ebooks/oeb/transforms/manglecase.py +++ b/src/calibre/ebooks/oeb/transforms/manglecase.py @@ -6,13 +6,6 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -import sys -import os -import re -import operator -import math -from itertools import chain -from collections import defaultdict from lxml import etree from calibre.ebooks.oeb.base import XHTML, XHTML_NS from calibre.ebooks.oeb.base import CSS_MIME diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py index aef5c2c98b..2d86fe63b5 100644 --- a/src/calibre/ebooks/oeb/transforms/rasterize.py +++ b/src/calibre/ebooks/oeb/transforms/rasterize.py @@ -6,7 +6,6 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -import sys import os from urlparse import urldefrag import base64 @@ -20,9 +19,9 @@ from PyQt4.QtGui import QImage from PyQt4.QtGui import QPainter from PyQt4.QtSvg import QSvgRenderer from PyQt4.QtGui import QApplication -from calibre.ebooks.oeb.base import XHTML_NS, XHTML, SVG_NS, SVG, XLINK -from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME, JPEG_MIME -from calibre.ebooks.oeb.base import xml2str, xpath, namespace, barename +from calibre.ebooks.oeb.base import XHTML, XLINK +from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME +from calibre.ebooks.oeb.base import xml2str, xpath from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.stylizer import Stylizer @@ -88,7 +87,7 @@ class SVGRasterizer(object): hrefs = self.oeb.manifest.hrefs for elem in xpath(svg, '//svg:*[@xl:href]'): href = urlnormalize(elem.attrib[XLINK('href')]) - path, frag = urldefrag(href) + path = urldefrag(href)[0] if not path: continue abshref = item.abshref(path)