mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Support for progress reporting in the conversion pipeline and fix size based splitting code
This commit is contained in:
parent
dbc2d315ed
commit
13e7d6334b
@ -79,6 +79,10 @@ class OptionRecommendation(object):
|
|||||||
repr(self.recommended_value) +
|
repr(self.recommended_value) +
|
||||||
' is not a string or a number')
|
' is not a string or a number')
|
||||||
|
|
||||||
|
class DummyReporter(object):
|
||||||
|
|
||||||
|
def __call__(self, percent, msg=''):
|
||||||
|
pass
|
||||||
|
|
||||||
class InputFormatPlugin(Plugin):
|
class InputFormatPlugin(Plugin):
|
||||||
'''
|
'''
|
||||||
@ -133,6 +137,10 @@ class InputFormatPlugin(Plugin):
|
|||||||
#: (option_name, recommended_value, recommendation_level)
|
#: (option_name, recommended_value, recommendation_level)
|
||||||
recommendations = set([])
|
recommendations = set([])
|
||||||
|
|
||||||
|
def __init__(self, *args):
|
||||||
|
Plugin.__init__(self, *args)
|
||||||
|
self.report_progress = DummyReporter()
|
||||||
|
|
||||||
def get_images(self):
|
def get_images(self):
|
||||||
'''
|
'''
|
||||||
Return a list of absolute paths to the images, if this input plugin
|
Return a list of absolute paths to the images, if this input plugin
|
||||||
@ -242,6 +250,11 @@ class OutputFormatPlugin(Plugin):
|
|||||||
#: (option_name, recommended_value, recommendation_level)
|
#: (option_name, recommended_value, recommendation_level)
|
||||||
recommendations = set([])
|
recommendations = set([])
|
||||||
|
|
||||||
|
def __init__(self, *args):
|
||||||
|
Plugin.__init__(self, *args)
|
||||||
|
self.report_progress = DummyReporter()
|
||||||
|
|
||||||
|
|
||||||
def convert(self, oeb_book, output, input_plugin, opts, log):
|
def convert(self, oeb_book, output, input_plugin, opts, log):
|
||||||
'''
|
'''
|
||||||
Render the contents of `oeb_book` (which is an instance of
|
Render the contents of `oeb_book` (which is an instance of
|
||||||
|
@ -343,7 +343,7 @@ class ComicInput(InputFormatPlugin):
|
|||||||
new_pages = n2
|
new_pages = n2
|
||||||
else:
|
else:
|
||||||
new_pages, failures = process_pages(new_pages, self.opts,
|
new_pages, failures = process_pages(new_pages, self.opts,
|
||||||
self.progress, tdir2)
|
self.report_progress, tdir2)
|
||||||
if not new_pages:
|
if not new_pages:
|
||||||
raise ValueError('Could not find any valid pages in comic: %s'
|
raise ValueError('Could not find any valid pages in comic: %s'
|
||||||
% comic)
|
% comic)
|
||||||
@ -360,13 +360,12 @@ class ComicInput(InputFormatPlugin):
|
|||||||
def get_images(self):
|
def get_images(self):
|
||||||
return self._images
|
return self._images
|
||||||
|
|
||||||
def convert(self, stream, opts, file_ext, log, accelerators,
|
def convert(self, stream, opts, file_ext, log, accelerators):
|
||||||
progress=lambda p, m : m):
|
|
||||||
from calibre.ebooks.metadata import MetaInformation
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
|
|
||||||
self.opts, self.log, self.progress = opts, log, progress
|
self.opts, self.log= opts, log
|
||||||
if file_ext == 'cbc':
|
if file_ext == 'cbc':
|
||||||
comics_ = self.get_comics_from_collection(stream)
|
comics_ = self.get_comics_from_collection(stream)
|
||||||
else:
|
else:
|
||||||
|
@ -175,8 +175,20 @@ def add_pipeline_options(parser, plumber):
|
|||||||
def option_parser():
|
def option_parser():
|
||||||
return OptionParser(usage=USAGE)
|
return OptionParser(usage=USAGE)
|
||||||
|
|
||||||
|
|
||||||
|
class ProgressBar(object):
|
||||||
|
|
||||||
|
def __init__(self, log):
|
||||||
|
self.log = log
|
||||||
|
|
||||||
|
def __call__(self, frac, msg=''):
|
||||||
|
if msg:
|
||||||
|
percent = int(frac*100)
|
||||||
|
self.log('%d%% %s'%(percent, msg))
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
log = Log()
|
log = Log()
|
||||||
|
reporter = ProgressBar(log)
|
||||||
parser = option_parser()
|
parser = option_parser()
|
||||||
if len(args) < 3:
|
if len(args) < 3:
|
||||||
print_help(parser, log)
|
print_help(parser, log)
|
||||||
@ -186,7 +198,7 @@ def main(args=sys.argv):
|
|||||||
|
|
||||||
from calibre.ebooks.conversion.plumber import Plumber
|
from calibre.ebooks.conversion.plumber import Plumber
|
||||||
|
|
||||||
plumber = Plumber(input, output, log)
|
plumber = Plumber(input, output, log, reporter)
|
||||||
add_input_output_options(parser, plumber)
|
add_input_output_options(parser, plumber)
|
||||||
add_pipeline_options(parser, plumber)
|
add_pipeline_options(parser, plumber)
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import os, re
|
import os, re
|
||||||
|
|
||||||
from calibre.customize.conversion import OptionRecommendation
|
from calibre.customize.conversion import OptionRecommendation, DummyReporter
|
||||||
from calibre.customize.ui import input_profiles, output_profiles, \
|
from calibre.customize.ui import input_profiles, output_profiles, \
|
||||||
plugin_for_input_format, plugin_for_output_format
|
plugin_for_input_format, plugin_for_output_format
|
||||||
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
||||||
@ -22,6 +22,17 @@ def supported_input_formats():
|
|||||||
class OptionValues(object):
|
class OptionValues(object):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class CompositeProgressReporter(object):
|
||||||
|
|
||||||
|
def __init__(self, global_min, global_max, global_reporter):
|
||||||
|
self.global_min, self.global_max = global_min, global_max
|
||||||
|
self.global_reporter = global_reporter
|
||||||
|
|
||||||
|
def __call__(self, fraction, msg=''):
|
||||||
|
global_frac = self.global_min + fraction * \
|
||||||
|
(self.global_max - self.global_min)
|
||||||
|
self.global_reporter(global_frac, msg)
|
||||||
|
|
||||||
class Plumber(object):
|
class Plumber(object):
|
||||||
'''
|
'''
|
||||||
The `Plumber` manages the conversion pipeline. An UI should call the methods
|
The `Plumber` manages the conversion pipeline. An UI should call the methods
|
||||||
@ -35,7 +46,7 @@ class Plumber(object):
|
|||||||
'tags', 'book_producer', 'language'
|
'tags', 'book_producer', 'language'
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, input, output, log):
|
def __init__(self, input, output, log, report_progress=DummyReporter()):
|
||||||
'''
|
'''
|
||||||
:param input: Path to input file.
|
:param input: Path to input file.
|
||||||
:param output: Path to output file/directory
|
:param output: Path to output file/directory
|
||||||
@ -43,6 +54,7 @@ class Plumber(object):
|
|||||||
self.input = os.path.abspath(input)
|
self.input = os.path.abspath(input)
|
||||||
self.output = os.path.abspath(output)
|
self.output = os.path.abspath(output)
|
||||||
self.log = log
|
self.log = log
|
||||||
|
self.ui_reporter = report_progress
|
||||||
|
|
||||||
# Initialize the conversion options that are independent of input and
|
# Initialize the conversion options that are independent of input and
|
||||||
# output formats. The input and output plugins can still disable these
|
# output formats. The input and output plugins can still disable these
|
||||||
@ -63,7 +75,8 @@ OptionRecommendation(name='input_profile',
|
|||||||
'conversion system information on how to interpret '
|
'conversion system information on how to interpret '
|
||||||
'various information in the input document. For '
|
'various information in the input document. For '
|
||||||
'example resolution dependent lengths (i.e. lengths in '
|
'example resolution dependent lengths (i.e. lengths in '
|
||||||
'pixels).')
|
'pixels). Choices are:')+\
|
||||||
|
', '.join([x.short_name for x in input_profiles()])
|
||||||
),
|
),
|
||||||
|
|
||||||
OptionRecommendation(name='output_profile',
|
OptionRecommendation(name='output_profile',
|
||||||
@ -73,8 +86,9 @@ OptionRecommendation(name='output_profile',
|
|||||||
'tells the conversion system how to optimize the '
|
'tells the conversion system how to optimize the '
|
||||||
'created document for the specified device. In some cases, '
|
'created document for the specified device. In some cases, '
|
||||||
'an output profile is required to produce documents that '
|
'an output profile is required to produce documents that '
|
||||||
'will work on a device. For example EPUB on the SONY reader.'
|
'will work on a device. For example EPUB on the SONY reader. '
|
||||||
)
|
'Choices are:') + \
|
||||||
|
', '.join([x.short_name for x in output_profiles()])
|
||||||
),
|
),
|
||||||
|
|
||||||
OptionRecommendation(name='base_font_size',
|
OptionRecommendation(name='base_font_size',
|
||||||
@ -552,6 +566,9 @@ OptionRecommendation(name='list_recipes',
|
|||||||
if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf':
|
if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf':
|
||||||
self.opts.lrf = True
|
self.opts.lrf = True
|
||||||
|
|
||||||
|
self.ui_reporter(0.01, _('Converting input to HTML...'))
|
||||||
|
ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
|
||||||
|
self.input_plugin.report_progress = ir
|
||||||
self.oeb = self.input_plugin(stream, self.opts,
|
self.oeb = self.input_plugin(stream, self.opts,
|
||||||
self.input_fmt, self.log,
|
self.input_fmt, self.log,
|
||||||
accelerators, tdir)
|
accelerators, tdir)
|
||||||
@ -560,9 +577,12 @@ OptionRecommendation(name='list_recipes',
|
|||||||
return
|
return
|
||||||
if not hasattr(self.oeb, 'manifest'):
|
if not hasattr(self.oeb, 'manifest'):
|
||||||
self.oeb = create_oebbook(self.log, self.oeb, self.opts)
|
self.oeb = create_oebbook(self.log, self.oeb, self.opts)
|
||||||
|
pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
|
||||||
|
pr(0., _('Running transforms on ebook...'))
|
||||||
|
|
||||||
from calibre.ebooks.oeb.transforms.guide import Clean
|
from calibre.ebooks.oeb.transforms.guide import Clean
|
||||||
Clean()(self.oeb, self.opts)
|
Clean()(self.oeb, self.opts)
|
||||||
|
pr(0.1)
|
||||||
|
|
||||||
self.opts.source = self.opts.input_profile
|
self.opts.source = self.opts.input_profile
|
||||||
self.opts.dest = self.opts.output_profile
|
self.opts.dest = self.opts.output_profile
|
||||||
@ -570,9 +590,11 @@ OptionRecommendation(name='list_recipes',
|
|||||||
from calibre.ebooks.oeb.transforms.metadata import MergeMetadata
|
from calibre.ebooks.oeb.transforms.metadata import MergeMetadata
|
||||||
MergeMetadata()(self.oeb, self.user_metadata,
|
MergeMetadata()(self.oeb, self.user_metadata,
|
||||||
self.opts.prefer_metadata_cover)
|
self.opts.prefer_metadata_cover)
|
||||||
|
pr(0.2)
|
||||||
|
|
||||||
from calibre.ebooks.oeb.transforms.structure import DetectStructure
|
from calibre.ebooks.oeb.transforms.structure import DetectStructure
|
||||||
DetectStructure()(self.oeb, self.opts)
|
DetectStructure()(self.oeb, self.opts)
|
||||||
|
pr(0.35)
|
||||||
|
|
||||||
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
|
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
|
||||||
fbase = self.opts.base_font_size
|
fbase = self.opts.base_font_size
|
||||||
@ -586,6 +608,7 @@ OptionRecommendation(name='list_recipes',
|
|||||||
|
|
||||||
from calibre.ebooks.oeb.transforms.jacket import Jacket
|
from calibre.ebooks.oeb.transforms.jacket import Jacket
|
||||||
Jacket()(self.oeb, self.opts)
|
Jacket()(self.oeb, self.opts)
|
||||||
|
pr(0.4)
|
||||||
|
|
||||||
if self.opts.extra_css and os.path.exists(self.opts.extra_css):
|
if self.opts.extra_css and os.path.exists(self.opts.extra_css):
|
||||||
self.opts.extra_css = open(self.opts.extra_css, 'rb').read()
|
self.opts.extra_css = open(self.opts.extra_css, 'rb').read()
|
||||||
@ -598,6 +621,7 @@ OptionRecommendation(name='list_recipes',
|
|||||||
if self.opts.linearize_tables:
|
if self.opts.linearize_tables:
|
||||||
from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
|
from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
|
||||||
LinearizeTables()(self.oeb, self.opts)
|
LinearizeTables()(self.oeb, self.opts)
|
||||||
|
pr(0.7)
|
||||||
|
|
||||||
from calibre.ebooks.oeb.transforms.split import Split
|
from calibre.ebooks.oeb.transforms.split import Split
|
||||||
pbx = accelerators.get('pagebreaks', None)
|
pbx = accelerators.get('pagebreaks', None)
|
||||||
@ -605,6 +629,7 @@ OptionRecommendation(name='list_recipes',
|
|||||||
max_flow_size=self.opts.output_profile.flow_size,
|
max_flow_size=self.opts.output_profile.flow_size,
|
||||||
page_breaks_xpath=pbx)
|
page_breaks_xpath=pbx)
|
||||||
split(self.oeb, self.opts)
|
split(self.oeb, self.opts)
|
||||||
|
pr(0.9)
|
||||||
|
|
||||||
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
|
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
|
||||||
|
|
||||||
@ -613,10 +638,15 @@ OptionRecommendation(name='list_recipes',
|
|||||||
trimmer(self.oeb, self.opts)
|
trimmer(self.oeb, self.opts)
|
||||||
|
|
||||||
self.oeb.toc.rationalize_play_orders()
|
self.oeb.toc.rationalize_play_orders()
|
||||||
|
pr(1.)
|
||||||
|
|
||||||
self.log.info('Creating %s...'%self.output_plugin.name)
|
self.log.info('Creating %s...'%self.output_plugin.name)
|
||||||
|
our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
|
||||||
|
self.output_plugin.report_progress = our
|
||||||
|
our(0., _('Creating')+' %s'%self.output_plugin.name)
|
||||||
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
|
self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
|
||||||
self.opts, self.log)
|
self.opts, self.log)
|
||||||
|
self.ui_reporter(1.)
|
||||||
|
|
||||||
def create_oebbook(log, path_or_stream, opts, reader=None):
|
def create_oebbook(log, path_or_stream, opts, reader=None):
|
||||||
'''
|
'''
|
||||||
|
@ -1571,26 +1571,26 @@ class OEBBook(object):
|
|||||||
|
|
||||||
def decode(self, data):
|
def decode(self, data):
|
||||||
"""Automatically decode :param:`data` into a `unicode` object."""
|
"""Automatically decode :param:`data` into a `unicode` object."""
|
||||||
|
def fix_data(d):
|
||||||
|
return d.replace('\r\n', '\n').replace('\r', '\n')
|
||||||
if isinstance(data, unicode):
|
if isinstance(data, unicode):
|
||||||
return data
|
return fix_data(data)
|
||||||
if data[:2] in ('\xff\xfe', '\xfe\xff'):
|
if data[:2] in ('\xff\xfe', '\xfe\xff'):
|
||||||
try:
|
try:
|
||||||
return data.decode('utf-16')
|
return fix_data(data.decode('utf-16'))
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
pass
|
pass
|
||||||
if self.encoding is not None:
|
if self.encoding is not None:
|
||||||
try:
|
try:
|
||||||
return data.decode(self.encoding)
|
return fix_data(data.decode(self.encoding))
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
return data.decode('utf-8')
|
return fix_data(data.decode('utf-8'))
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
pass
|
pass
|
||||||
data, _ = xml_to_unicode(data)
|
data, _ = xml_to_unicode(data)
|
||||||
data = data.replace('\r\n', '\n')
|
return fix_data(data)
|
||||||
data = data.replace('\r', '\n')
|
|
||||||
return data
|
|
||||||
|
|
||||||
def to_opf1(self):
|
def to_opf1(self):
|
||||||
"""Produce OPF 1.2 representing the book's metadata and structure.
|
"""Produce OPF 1.2 representing the book's metadata and structure.
|
||||||
|
@ -173,18 +173,22 @@ class FlowSplitter(object):
|
|||||||
|
|
||||||
if self.max_flow_size > 0:
|
if self.max_flow_size > 0:
|
||||||
lt_found = False
|
lt_found = False
|
||||||
self.log('\tLooking for large trees...')
|
self.log('\tLooking for large trees in %s...'%item.href)
|
||||||
trees = list(self.trees)
|
trees = list(self.trees)
|
||||||
for i, tree in enumerate(list(self.trees)):
|
self.tree_map = {}
|
||||||
self.trees = []
|
for i, tree in enumerate(trees):
|
||||||
size = len(tostring(tree.getroot()))
|
size = len(tostring(tree.getroot()))
|
||||||
if size > self.opts.profile.flow_size:
|
if size > self.max_flow_size:
|
||||||
|
self.log('\tFound large tree #%d'%i)
|
||||||
lt_found = True
|
lt_found = True
|
||||||
|
self.split_trees = []
|
||||||
self.split_to_size(tree)
|
self.split_to_size(tree)
|
||||||
trees[i:i+1] = list(self.trees)
|
self.tree_map[tree] = self.split_trees
|
||||||
if not lt_found:
|
if not lt_found:
|
||||||
self.log_info('\tNo large trees found')
|
self.log('\tNo large trees found')
|
||||||
self.trees = trees
|
self.trees = []
|
||||||
|
for x in trees:
|
||||||
|
self.trees.extend(self.tree_map.get(x, [x]))
|
||||||
|
|
||||||
self.was_split = len(self.trees) > 1
|
self.was_split = len(self.trees) > 1
|
||||||
self.commit()
|
self.commit()
|
||||||
@ -347,11 +351,10 @@ class FlowSplitter(object):
|
|||||||
continue
|
continue
|
||||||
size = len(tostring(r))
|
size = len(tostring(r))
|
||||||
if size <= self.max_flow_size:
|
if size <= self.max_flow_size:
|
||||||
self.trees.append(t)
|
self.split_trees.append(t)
|
||||||
#print tostring(t.getroot(), pretty_print=True)
|
self.log.debug(
|
||||||
self.log.debug('\t\t\tCommitted sub-tree #%d (%d KB)',
|
'\t\t\tCommitted sub-tree #%d (%d KB)'%(
|
||||||
len(self.trees), size/1024.)
|
len(self.split_trees), size/1024.))
|
||||||
self.split_size += size
|
|
||||||
else:
|
else:
|
||||||
self.split_to_size(t)
|
self.split_to_size(t)
|
||||||
|
|
||||||
|
@ -38,7 +38,7 @@ class RecipeInput(InputFormatPlugin):
|
|||||||
])
|
])
|
||||||
|
|
||||||
def convert(self, recipe_or_file, opts, file_ext, log,
|
def convert(self, recipe_or_file, opts, file_ext, log,
|
||||||
accelerators, progress=lambda x, y: x):
|
accelerators):
|
||||||
from calibre.web.feeds.recipes import \
|
from calibre.web.feeds.recipes import \
|
||||||
get_builtin_recipe, compile_recipe
|
get_builtin_recipe, compile_recipe
|
||||||
if os.access(recipe_or_file, os.R_OK):
|
if os.access(recipe_or_file, os.R_OK):
|
||||||
@ -51,7 +51,7 @@ class RecipeInput(InputFormatPlugin):
|
|||||||
raise ValueError('%s is not a valid recipe file or builtin recipe' %
|
raise ValueError('%s is not a valid recipe file or builtin recipe' %
|
||||||
recipe_or_file)
|
recipe_or_file)
|
||||||
|
|
||||||
ro = recipe(opts, log, progress)
|
ro = recipe(opts, log, self.report_progress)
|
||||||
ro.download()
|
ro.download()
|
||||||
|
|
||||||
opts.output_profile.flow_size = 0
|
opts.output_profile.flow_size = 0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user