mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Conversion pipeline framework is finally taking shape
This commit is contained in:
parent
9445f488c2
commit
741d638409
@ -117,7 +117,11 @@ class InputFormatPlugin(Plugin):
|
||||
#: instance of :class:`OptionRecommendation`.
|
||||
options = set([])
|
||||
|
||||
def convert(self, stream, options, file_ext, parse_cache, log):
|
||||
#: A set of 3-tuples of the form
|
||||
#: (option_name, recommended_value, recommendation_level)
|
||||
recommendations = set([])
|
||||
|
||||
def convert(self, stream, options, file_ext, parse_cache, log, accelerators):
|
||||
'''
|
||||
This method must be implemented in sub-classes. It must return
|
||||
the path to the created OPF file. All output should be contained in
|
||||
@ -153,10 +157,16 @@ class InputFormatPlugin(Plugin):
|
||||
|
||||
:param log: A :class:`calibre.utils.logging.Log` object. All output
|
||||
should use this object.
|
||||
|
||||
:param accelarators: A dictionary of various information that the input
|
||||
plugin can get easily that would speed up the
|
||||
subsequent stages of the conversion.
|
||||
|
||||
'''
|
||||
raise NotImplementedError
|
||||
|
||||
def __call__(self, stream, options, file_ext, parse_cache, log, output_dir):
|
||||
def __call__(self, stream, options, file_ext, parse_cache, log,
|
||||
accelerators, output_dir):
|
||||
log('InputFormatPlugin: %s running'%self.name, end=' ')
|
||||
if hasattr(stream, 'name'):
|
||||
log('on', stream.name)
|
||||
@ -166,7 +176,8 @@ class InputFormatPlugin(Plugin):
|
||||
shutil.rmtree(x) if os.path.isdir(x) else os.remove(x)
|
||||
|
||||
|
||||
ret = self.convert(stream, options, file_ext, parse_cache, log)
|
||||
ret = self.convert(stream, options, file_ext, parse_cache,
|
||||
log, accelerators)
|
||||
for key in list(parse_cache.keys()):
|
||||
if os.path.abspath(key) != key:
|
||||
log.warn(('InputFormatPlugin: %s returned a '
|
||||
@ -221,6 +232,10 @@ class OutputFormatPlugin(Plugin):
|
||||
#: instance of :class:`OptionRecommendation`.
|
||||
options = set([])
|
||||
|
||||
#: A set of 3-tuples of the form
|
||||
#: (option_name, recommended_value, recommendation_level)
|
||||
recommendations = set([])
|
||||
|
||||
def convert(self, oeb_book, input_plugin, options, parse_cache, log):
|
||||
raise NotImplementedError
|
||||
|
||||
|
@ -39,6 +39,7 @@ from optparse import OptionGroup, Option
|
||||
from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.customize.conversion import OptionRecommendation
|
||||
|
||||
def print_help(parser, log):
|
||||
help = parser.format_help().encode(preferred_encoding, 'replace')
|
||||
@ -84,16 +85,16 @@ def add_input_output_options(parser, plumber):
|
||||
option_recommendation_to_cli_option(group, opt)
|
||||
|
||||
if input_options:
|
||||
title = plumber.input_fmt.upper() + ' ' + _('OPTIONS')
|
||||
title = _('INPUT OPTIONS')
|
||||
io = OptionGroup(parser, title, _('Options to control the processing'
|
||||
' of the input file'))
|
||||
' of the input %s file')%plumber.input_fmt)
|
||||
add_options(io.add_option, input_options)
|
||||
parser.add_option_group(io)
|
||||
|
||||
if output_options:
|
||||
title = plumber.output_fmt.upper() + ' ' + _('OPTIONS')
|
||||
oo = OptionGroup(parser, title, _('Options to control the processing'
|
||||
' of the output file'))
|
||||
' of the output %s file')%plumber.input_fmt)
|
||||
add_options(oo.add_option, output_options)
|
||||
parser.add_option_group(oo)
|
||||
|
||||
@ -106,6 +107,9 @@ def add_pipeline_options(parser, plumber):
|
||||
]
|
||||
),
|
||||
|
||||
'METADATA' : (_('Options to set metadata in the output'),
|
||||
plumber.metadata_option_names,
|
||||
),
|
||||
'DEBUG': (_('Options to help with debugging the conversion'),
|
||||
[
|
||||
'verbose',
|
||||
@ -114,7 +118,7 @@ def add_pipeline_options(parser, plumber):
|
||||
|
||||
}
|
||||
|
||||
group_order = ['', 'DEBUG']
|
||||
group_order = ['', 'METADATA', 'DEBUG']
|
||||
|
||||
for group in group_order:
|
||||
desc, options = groups[group]
|
||||
@ -147,11 +151,16 @@ def main(args=sys.argv):
|
||||
add_pipeline_options(parser, plumber)
|
||||
|
||||
opts = parser.parse_args(args)[0]
|
||||
recommendations = [(n.dest, getattr(opts, n.dest)) \
|
||||
for n in parser.options_iter()]
|
||||
|
||||
recommendations = [(n.dest, getattr(opts, n.dest),
|
||||
OptionRecommendation.HIGH) \
|
||||
for n in parser.options_iter()
|
||||
if n.dest]
|
||||
plumber.merge_ui_recommendations(recommendations)
|
||||
|
||||
plumber.run()
|
||||
|
||||
log(_('Output saved to'), ' ', plumber.output)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -9,9 +9,23 @@ from calibre.customize.conversion import OptionRecommendation
|
||||
from calibre.customize.ui import input_profiles, output_profiles, \
|
||||
plugin_for_input_format, plugin_for_output_format
|
||||
|
||||
class OptionValues(object):
|
||||
pass
|
||||
|
||||
class Plumber(object):
|
||||
|
||||
pipeline_options = [
|
||||
metadata_option_names = [
|
||||
'title', 'authors', 'title_sort', 'author_sort', 'cover', 'comments',
|
||||
'publisher', 'series', 'series_index', 'rating', 'isbn',
|
||||
'tags', 'book_producer', 'language'
|
||||
]
|
||||
|
||||
def __init__(self, input, output, log):
|
||||
self.input = input
|
||||
self.output = output
|
||||
self.log = log
|
||||
|
||||
self.pipeline_options = [
|
||||
|
||||
OptionRecommendation(name='verbose',
|
||||
recommended_value=0, level=OptionRecommendation.LOW,
|
||||
@ -40,13 +54,72 @@ OptionRecommendation(name='output_profile',
|
||||
'will work on a device. For example EPUB on the SONY reader.'
|
||||
)
|
||||
),
|
||||
|
||||
OptionRecommendation(name='read_metadata_from_opf',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
short_switch='m',
|
||||
help=_('Read metadata from the specified OPF file. Metadata read '
|
||||
'from this file will override any metadata in the source '
|
||||
'file.')
|
||||
),
|
||||
|
||||
OptionRecommendation(name='title',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the title.')),
|
||||
|
||||
OptionRecommendation(name='authors',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the authors. Multiple authors should be separated ')),
|
||||
|
||||
OptionRecommendation(name='title_sort',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('The version of the title to be used for sorting. ')),
|
||||
|
||||
OptionRecommendation(name='author_sort',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('String to be used when sorting by author. ')),
|
||||
|
||||
OptionRecommendation(name='cover',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the cover to the specified file.')),
|
||||
|
||||
OptionRecommendation(name='comments',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the ebook description.')),
|
||||
|
||||
OptionRecommendation(name='publisher',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the ebook publisher.')),
|
||||
|
||||
OptionRecommendation(name='series',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the series this ebook belongs to.')),
|
||||
|
||||
OptionRecommendation(name='series_index',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the index of the book in this series.')),
|
||||
|
||||
OptionRecommendation(name='rating',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the rating. Should be a number between 1 and 5.')),
|
||||
|
||||
OptionRecommendation(name='isbn',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the ISBN of the book.')),
|
||||
|
||||
OptionRecommendation(name='tags',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the tags for the book. Should be a comma separated list.')),
|
||||
|
||||
OptionRecommendation(name='book_producer',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the book producer.')),
|
||||
|
||||
OptionRecommendation(name='language',
|
||||
recommended_value=None, level=OptionRecommendation.LOW,
|
||||
help=_('Set the language.')),
|
||||
]
|
||||
|
||||
def __init__(self, input, output, log):
|
||||
self.input = input
|
||||
self.output = output
|
||||
self.log = log
|
||||
|
||||
input_fmt = os.path.splitext(input)[1]
|
||||
if not input_fmt:
|
||||
@ -85,11 +158,79 @@ OptionRecommendation(name='output_profile',
|
||||
return rec
|
||||
|
||||
def merge_plugin_recommendations(self):
|
||||
pass
|
||||
for source in (self.input_plugin, self.output_plugin):
|
||||
for name, val, level in source.recommendations:
|
||||
rec = self.get_option_by_name(name)
|
||||
if rec is not None and rec.level <= level:
|
||||
rec.recommended_value = val
|
||||
|
||||
def merge_ui_recommendations(self, recommendations):
|
||||
pass
|
||||
for name, val, level in recommendations:
|
||||
rec = self.get_option_by_name(name)
|
||||
if rec is not None and rec.level <= level and rec.level < rec.HIGH:
|
||||
rec.recommended_value = val
|
||||
|
||||
def read_user_metadata(self):
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
mi = MetaInformation(None, [])
|
||||
if self.opts.read_metadata_from_opf is not None:
|
||||
self.opts.read_metadata_from_opf = os.path.abspath(
|
||||
self.opts.read_metadata_from_opf)
|
||||
opf = OPF(open(self.opts.read_metadata_from_opf, 'rb'),
|
||||
os.path.dirname(self.opts.read_metadata_from_opf))
|
||||
mi = MetaInformation(opf)
|
||||
for x in self.metadata_option_names:
|
||||
val = getattr(self.opts, x, None)
|
||||
if val is not None:
|
||||
if x == 'authors':
|
||||
val = string_to_authors(val)
|
||||
elif x == 'tags':
|
||||
val = [i.strip() for i in val.split(',')]
|
||||
elif x in ('rating', 'series_index'):
|
||||
val = float(val)
|
||||
setattr(mi, x, val)
|
||||
if mi.cover:
|
||||
mi.cover_data = ('', open(mi.cover, 'rb').read())
|
||||
mi.cover = None
|
||||
self.user_metadata = mi
|
||||
|
||||
|
||||
def setup_options(self):
|
||||
self.opts = OptionValues()
|
||||
for group in (self.input_options, self.pipeline_options,
|
||||
self.output_options):
|
||||
for rec in group:
|
||||
setattr(self.opts, rec.option.name, rec.recommended_value)
|
||||
|
||||
for x in input_profiles():
|
||||
if x.short_name == self.opts.input_profile:
|
||||
self.opts.input_profile = x
|
||||
break
|
||||
|
||||
for x in output_profiles():
|
||||
if x.short_name == self.opts.output_profile:
|
||||
self.opts.output_profile = x
|
||||
break
|
||||
|
||||
self.read_user_metadata()
|
||||
|
||||
def run(self):
|
||||
self.setup_options()
|
||||
from calibre.customize.ui import run_plugins_on_preprocess
|
||||
self.input = run_plugins_on_preprocess(self.input)
|
||||
|
||||
from calibre.ebooks.oeb.reader import OEBReader
|
||||
from calibre.ebooks.oeb.base import OEBBook
|
||||
parse_cache, accelerators = {}, {}
|
||||
|
||||
opfpath = self.input_plugin(open(self.input, 'rb'), self.opts,
|
||||
self.input_fmt, parse_cache, self.log,
|
||||
accelerators)
|
||||
|
||||
self.reader = OEBReader()
|
||||
self.oeb = OEBBook(self.log, parse_cache=parse_cache)
|
||||
self.reader(self.oeb, opfpath)
|
||||
|
||||
|
||||
|
@ -51,7 +51,8 @@ class EPUBInput(InputFormatPlugin):
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
def convert(self, stream, options, file_ext, parse_cache, log):
|
||||
def convert(self, stream, options, file_ext, parse_cache, log,
|
||||
accelerators):
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
from calibre import walk
|
||||
from calibre.ebooks import DRMError
|
||||
|
@ -12,7 +12,8 @@ class MOBIInput(InputFormatPlugin):
|
||||
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
|
||||
file_types = set(['mobi', 'prc', 'azw'])
|
||||
|
||||
def convert(self, stream, options, file_ext, parse_cache, log):
|
||||
def convert(self, stream, options, file_ext, parse_cache, log,
|
||||
accelerators):
|
||||
from calibre.ebooks.mobi.reader import MobiReader
|
||||
mr = MobiReader(stream, log, options.input_encoding,
|
||||
options.debug_input)
|
||||
@ -22,5 +23,8 @@ class MOBIInput(InputFormatPlugin):
|
||||
if isinstance(raw, unicode):
|
||||
raw = raw.encode('utf-8')
|
||||
open('debug-raw.html', 'wb').write(raw)
|
||||
|
||||
for f, root in parse_cache.items():
|
||||
if '.' in f:
|
||||
accelerators[f] = {'pagebreaks':root.xpath(
|
||||
'//div[@class="mbp_pagebreak"]')}
|
||||
return mr.created_opf_path
|
@ -9,7 +9,6 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
|
||||
import sys
|
||||
import os
|
||||
from struct import pack
|
||||
import functools
|
||||
import time
|
||||
import random
|
||||
from cStringIO import StringIO
|
||||
@ -18,11 +17,10 @@ from itertools import izip, count
|
||||
from collections import defaultdict
|
||||
from urlparse import urldefrag
|
||||
import logging
|
||||
from lxml import etree
|
||||
from PIL import Image
|
||||
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
|
||||
OEB_RASTER_IMAGES
|
||||
from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname
|
||||
from calibre.ebooks.oeb.base import namespace, prefixname
|
||||
from calibre.ebooks.oeb.base import urlnormalize
|
||||
from calibre.ebooks.oeb.base import OEBBook
|
||||
from calibre.ebooks.oeb.profile import Context
|
||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, sys, re, uuid
|
||||
import os, re, uuid
|
||||
from mimetypes import types_map
|
||||
from collections import defaultdict
|
||||
from itertools import count
|
||||
@ -203,14 +203,6 @@ class OEBError(Exception):
|
||||
"""Generic OEB-processing error."""
|
||||
pass
|
||||
|
||||
|
||||
class FauxLogger(object):
|
||||
"""Fake logging interface."""
|
||||
def __getattr__(self, name):
|
||||
return self
|
||||
def __call__(self, message):
|
||||
print message
|
||||
|
||||
class NullContainer(object):
|
||||
"""An empty container.
|
||||
|
||||
@ -1224,16 +1216,20 @@ class PageList(object):
|
||||
class OEBBook(object):
|
||||
"""Representation of a book in the IDPF OEB data model."""
|
||||
|
||||
def __init__(self, encoding=None, pretty_print=False, logger=FauxLogger()):
|
||||
def __init__(self, logger, parse_cache={}, encoding='utf-8',
|
||||
pretty_print=False):
|
||||
"""Create empty book. Optional arguments:
|
||||
|
||||
:param parse_cache: A cache of parsed XHTML/CSS. Keys are absolute
|
||||
paths to te cached files and values are lxml root objects and
|
||||
cssutils stylesheets.
|
||||
:param:`encoding`: Default encoding for textual content read
|
||||
from an external container.
|
||||
:param:`pretty_print`: Whether or not the canonical string form
|
||||
of XML markup is pretty-printed.
|
||||
:prama:`logger`: A Logger object to use for logging all messages
|
||||
:param:`logger`: A Log object to use for logging all messages
|
||||
related to the processing of this book. It is accessible
|
||||
via the instance data member :attr:`logger`.
|
||||
via the instance data members :attr:`logger,log`.
|
||||
|
||||
It provides the following public instance data members for
|
||||
accessing various parts of the OEB data model:
|
||||
@ -1251,7 +1247,7 @@ class OEBBook(object):
|
||||
"""
|
||||
self.encoding = encoding
|
||||
self.pretty_print = pretty_print
|
||||
self.logger = logger
|
||||
self.logger = self.log = logger
|
||||
self.version = '2.0'
|
||||
self.container = NullContainer()
|
||||
self.metadata = Metadata(self)
|
||||
|
@ -19,9 +19,9 @@ from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_IMAGES, \
|
||||
PAGE_MAP_MIME, JPEG_MIME, NCX_MIME, SVG_MIME
|
||||
from calibre.ebooks.oeb.base import XMLDECL_RE, COLLAPSE_RE, CSSURL_RE, \
|
||||
ENTITY_RE, LINK_SELECTORS, MS_COVER_TYPE
|
||||
from calibre.ebooks.oeb.base import namespace, barename, qname, XPath, xpath
|
||||
from calibre.ebooks.oeb.base import urlnormalize, xml2str
|
||||
from calibre.ebooks.oeb.base import OEBError, OEBBook, DirContainer
|
||||
from calibre.ebooks.oeb.base import namespace, barename, qname, XPath, xpath, \
|
||||
urlnormalize, BINARY_MIME, \
|
||||
OEBError, OEBBook, DirContainer
|
||||
from calibre.ebooks.oeb.writer import OEBWriter
|
||||
from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
|
||||
from calibre.ebooks.metadata.epub import CoverRenderer
|
||||
@ -45,9 +45,6 @@ class OEBReader(object):
|
||||
TRANSFORMS = []
|
||||
"""List of transforms to apply to content read with this Reader."""
|
||||
|
||||
def __init__(self):
|
||||
return
|
||||
|
||||
@classmethod
|
||||
def config(cls, cfg):
|
||||
"""Add any book-reading options to the :class:`Config` object
|
||||
@ -65,7 +62,7 @@ class OEBReader(object):
|
||||
:param:`oeb`.
|
||||
"""
|
||||
self.oeb = oeb
|
||||
self.logger = oeb.logger
|
||||
self.logger = self.log = oeb.logger
|
||||
oeb.container = self.Container(path)
|
||||
opf = self._read_opf()
|
||||
self._all_from_opf(opf)
|
||||
|
@ -6,18 +6,14 @@ from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import operator
|
||||
import math
|
||||
from itertools import chain
|
||||
from collections import defaultdict
|
||||
from lxml import etree
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
||||
from calibre.ebooks.oeb.base import CSS_MIME, OEB_STYLES
|
||||
from calibre.ebooks.oeb.base import namespace, barename
|
||||
from calibre.ebooks.oeb.base import OEBBook
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
|
||||
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
@ -6,9 +6,6 @@ from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys
|
||||
import os
|
||||
from lxml import etree
|
||||
from calibre.ebooks.oeb.base import XML, XHTML, XHTML_NS
|
||||
from calibre.ebooks.oeb.base import XHTML_MIME, CSS_MIME
|
||||
from calibre.ebooks.oeb.base import element
|
||||
|
@ -6,13 +6,6 @@ from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import operator
|
||||
import math
|
||||
from itertools import chain
|
||||
from collections import defaultdict
|
||||
from lxml import etree
|
||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
||||
from calibre.ebooks.oeb.base import CSS_MIME
|
||||
|
@ -6,7 +6,6 @@ from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys
|
||||
import os
|
||||
from urlparse import urldefrag
|
||||
import base64
|
||||
@ -20,9 +19,9 @@ from PyQt4.QtGui import QImage
|
||||
from PyQt4.QtGui import QPainter
|
||||
from PyQt4.QtSvg import QSvgRenderer
|
||||
from PyQt4.QtGui import QApplication
|
||||
from calibre.ebooks.oeb.base import XHTML_NS, XHTML, SVG_NS, SVG, XLINK
|
||||
from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME, JPEG_MIME
|
||||
from calibre.ebooks.oeb.base import xml2str, xpath, namespace, barename
|
||||
from calibre.ebooks.oeb.base import XHTML, XLINK
|
||||
from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME
|
||||
from calibre.ebooks.oeb.base import xml2str, xpath
|
||||
from calibre.ebooks.oeb.base import urlnormalize
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||
|
||||
@ -88,7 +87,7 @@ class SVGRasterizer(object):
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
for elem in xpath(svg, '//svg:*[@xl:href]'):
|
||||
href = urlnormalize(elem.attrib[XLINK('href')])
|
||||
path, frag = urldefrag(href)
|
||||
path = urldefrag(href)[0]
|
||||
if not path:
|
||||
continue
|
||||
abshref = item.abshref(path)
|
||||
|
Loading…
x
Reference in New Issue
Block a user