Support for progress reporting in the conversion pipeline and fix size based splitting code

This commit is contained in:
Kovid Goyal 2009-04-29 20:15:31 -07:00
parent dbc2d315ed
commit 13e7d6334b
7 changed files with 88 additions and 31 deletions

View File

@ -79,6 +79,10 @@ class OptionRecommendation(object):
repr(self.recommended_value) + repr(self.recommended_value) +
' is not a string or a number') ' is not a string or a number')
class DummyReporter(object):
def __call__(self, percent, msg=''):
pass
class InputFormatPlugin(Plugin): class InputFormatPlugin(Plugin):
''' '''
@ -133,6 +137,10 @@ class InputFormatPlugin(Plugin):
#: (option_name, recommended_value, recommendation_level) #: (option_name, recommended_value, recommendation_level)
recommendations = set([]) recommendations = set([])
def __init__(self, *args):
Plugin.__init__(self, *args)
self.report_progress = DummyReporter()
def get_images(self): def get_images(self):
''' '''
Return a list of absolute paths to the images, if this input plugin Return a list of absolute paths to the images, if this input plugin
@ -242,6 +250,11 @@ class OutputFormatPlugin(Plugin):
#: (option_name, recommended_value, recommendation_level) #: (option_name, recommended_value, recommendation_level)
recommendations = set([]) recommendations = set([])
def __init__(self, *args):
Plugin.__init__(self, *args)
self.report_progress = DummyReporter()
def convert(self, oeb_book, output, input_plugin, opts, log): def convert(self, oeb_book, output, input_plugin, opts, log):
''' '''
Render the contents of `oeb_book` (which is an instance of Render the contents of `oeb_book` (which is an instance of

View File

@ -343,7 +343,7 @@ class ComicInput(InputFormatPlugin):
new_pages = n2 new_pages = n2
else: else:
new_pages, failures = process_pages(new_pages, self.opts, new_pages, failures = process_pages(new_pages, self.opts,
self.progress, tdir2) self.report_progress, tdir2)
if not new_pages: if not new_pages:
raise ValueError('Could not find any valid pages in comic: %s' raise ValueError('Could not find any valid pages in comic: %s'
% comic) % comic)
@ -360,13 +360,12 @@ class ComicInput(InputFormatPlugin):
def get_images(self): def get_images(self):
return self._images return self._images
def convert(self, stream, opts, file_ext, log, accelerators, def convert(self, stream, opts, file_ext, log, accelerators):
progress=lambda p, m : m):
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
self.opts, self.log, self.progress = opts, log, progress self.opts, self.log= opts, log
if file_ext == 'cbc': if file_ext == 'cbc':
comics_ = self.get_comics_from_collection(stream) comics_ = self.get_comics_from_collection(stream)
else: else:

View File

@ -175,8 +175,20 @@ def add_pipeline_options(parser, plumber):
def option_parser(): def option_parser():
return OptionParser(usage=USAGE) return OptionParser(usage=USAGE)
class ProgressBar(object):
def __init__(self, log):
self.log = log
def __call__(self, frac, msg=''):
if msg:
percent = int(frac*100)
self.log('%d%% %s'%(percent, msg))
def main(args=sys.argv): def main(args=sys.argv):
log = Log() log = Log()
reporter = ProgressBar(log)
parser = option_parser() parser = option_parser()
if len(args) < 3: if len(args) < 3:
print_help(parser, log) print_help(parser, log)
@ -186,7 +198,7 @@ def main(args=sys.argv):
from calibre.ebooks.conversion.plumber import Plumber from calibre.ebooks.conversion.plumber import Plumber
plumber = Plumber(input, output, log) plumber = Plumber(input, output, log, reporter)
add_input_output_options(parser, plumber) add_input_output_options(parser, plumber)
add_pipeline_options(parser, plumber) add_pipeline_options(parser, plumber)

View File

@ -5,7 +5,7 @@ __docformat__ = 'restructuredtext en'
import os, re import os, re
from calibre.customize.conversion import OptionRecommendation from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.customize.ui import input_profiles, output_profiles, \ from calibre.customize.ui import input_profiles, output_profiles, \
plugin_for_input_format, plugin_for_output_format plugin_for_input_format, plugin_for_output_format
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
@ -22,6 +22,17 @@ def supported_input_formats():
class OptionValues(object): class OptionValues(object):
pass pass
class CompositeProgressReporter(object):
def __init__(self, global_min, global_max, global_reporter):
self.global_min, self.global_max = global_min, global_max
self.global_reporter = global_reporter
def __call__(self, fraction, msg=''):
global_frac = self.global_min + fraction * \
(self.global_max - self.global_min)
self.global_reporter(global_frac, msg)
class Plumber(object): class Plumber(object):
''' '''
The `Plumber` manages the conversion pipeline. An UI should call the methods The `Plumber` manages the conversion pipeline. An UI should call the methods
@ -35,7 +46,7 @@ class Plumber(object):
'tags', 'book_producer', 'language' 'tags', 'book_producer', 'language'
] ]
def __init__(self, input, output, log): def __init__(self, input, output, log, report_progress=DummyReporter()):
''' '''
:param input: Path to input file. :param input: Path to input file.
:param output: Path to output file/directory :param output: Path to output file/directory
@ -43,6 +54,7 @@ class Plumber(object):
self.input = os.path.abspath(input) self.input = os.path.abspath(input)
self.output = os.path.abspath(output) self.output = os.path.abspath(output)
self.log = log self.log = log
self.ui_reporter = report_progress
# Initialize the conversion options that are independent of input and # Initialize the conversion options that are independent of input and
# output formats. The input and output plugins can still disable these # output formats. The input and output plugins can still disable these
@ -63,7 +75,8 @@ OptionRecommendation(name='input_profile',
'conversion system information on how to interpret ' 'conversion system information on how to interpret '
'various information in the input document. For ' 'various information in the input document. For '
'example resolution dependent lengths (i.e. lengths in ' 'example resolution dependent lengths (i.e. lengths in '
'pixels).') 'pixels). Choices are:')+\
', '.join([x.short_name for x in input_profiles()])
), ),
OptionRecommendation(name='output_profile', OptionRecommendation(name='output_profile',
@ -73,8 +86,9 @@ OptionRecommendation(name='output_profile',
'tells the conversion system how to optimize the ' 'tells the conversion system how to optimize the '
'created document for the specified device. In some cases, ' 'created document for the specified device. In some cases, '
'an output profile is required to produce documents that ' 'an output profile is required to produce documents that '
'will work on a device. For example EPUB on the SONY reader.' 'will work on a device. For example EPUB on the SONY reader. '
) 'Choices are:') + \
', '.join([x.short_name for x in output_profiles()])
), ),
OptionRecommendation(name='base_font_size', OptionRecommendation(name='base_font_size',
@ -552,6 +566,9 @@ OptionRecommendation(name='list_recipes',
if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf': if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf':
self.opts.lrf = True self.opts.lrf = True
self.ui_reporter(0.01, _('Converting input to HTML...'))
ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
self.input_plugin.report_progress = ir
self.oeb = self.input_plugin(stream, self.opts, self.oeb = self.input_plugin(stream, self.opts,
self.input_fmt, self.log, self.input_fmt, self.log,
accelerators, tdir) accelerators, tdir)
@ -560,9 +577,12 @@ OptionRecommendation(name='list_recipes',
return return
if not hasattr(self.oeb, 'manifest'): if not hasattr(self.oeb, 'manifest'):
self.oeb = create_oebbook(self.log, self.oeb, self.opts) self.oeb = create_oebbook(self.log, self.oeb, self.opts)
pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
pr(0., _('Running transforms on ebook...'))
from calibre.ebooks.oeb.transforms.guide import Clean from calibre.ebooks.oeb.transforms.guide import Clean
Clean()(self.oeb, self.opts) Clean()(self.oeb, self.opts)
pr(0.1)
self.opts.source = self.opts.input_profile self.opts.source = self.opts.input_profile
self.opts.dest = self.opts.output_profile self.opts.dest = self.opts.output_profile
@ -570,9 +590,11 @@ OptionRecommendation(name='list_recipes',
from calibre.ebooks.oeb.transforms.metadata import MergeMetadata from calibre.ebooks.oeb.transforms.metadata import MergeMetadata
MergeMetadata()(self.oeb, self.user_metadata, MergeMetadata()(self.oeb, self.user_metadata,
self.opts.prefer_metadata_cover) self.opts.prefer_metadata_cover)
pr(0.2)
from calibre.ebooks.oeb.transforms.structure import DetectStructure from calibre.ebooks.oeb.transforms.structure import DetectStructure
DetectStructure()(self.oeb, self.opts) DetectStructure()(self.oeb, self.opts)
pr(0.35)
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
fbase = self.opts.base_font_size fbase = self.opts.base_font_size
@ -586,6 +608,7 @@ OptionRecommendation(name='list_recipes',
from calibre.ebooks.oeb.transforms.jacket import Jacket from calibre.ebooks.oeb.transforms.jacket import Jacket
Jacket()(self.oeb, self.opts) Jacket()(self.oeb, self.opts)
pr(0.4)
if self.opts.extra_css and os.path.exists(self.opts.extra_css): if self.opts.extra_css and os.path.exists(self.opts.extra_css):
self.opts.extra_css = open(self.opts.extra_css, 'rb').read() self.opts.extra_css = open(self.opts.extra_css, 'rb').read()
@ -598,6 +621,7 @@ OptionRecommendation(name='list_recipes',
if self.opts.linearize_tables: if self.opts.linearize_tables:
from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
LinearizeTables()(self.oeb, self.opts) LinearizeTables()(self.oeb, self.opts)
pr(0.7)
from calibre.ebooks.oeb.transforms.split import Split from calibre.ebooks.oeb.transforms.split import Split
pbx = accelerators.get('pagebreaks', None) pbx = accelerators.get('pagebreaks', None)
@ -605,6 +629,7 @@ OptionRecommendation(name='list_recipes',
max_flow_size=self.opts.output_profile.flow_size, max_flow_size=self.opts.output_profile.flow_size,
page_breaks_xpath=pbx) page_breaks_xpath=pbx)
split(self.oeb, self.opts) split(self.oeb, self.opts)
pr(0.9)
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
@ -613,10 +638,15 @@ OptionRecommendation(name='list_recipes',
trimmer(self.oeb, self.opts) trimmer(self.oeb, self.opts)
self.oeb.toc.rationalize_play_orders() self.oeb.toc.rationalize_play_orders()
pr(1.)
self.log.info('Creating %s...'%self.output_plugin.name) self.log.info('Creating %s...'%self.output_plugin.name)
our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
self.output_plugin.report_progress = our
our(0., _('Creating')+' %s'%self.output_plugin.name)
self.output_plugin.convert(self.oeb, self.output, self.input_plugin, self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
self.opts, self.log) self.opts, self.log)
self.ui_reporter(1.)
def create_oebbook(log, path_or_stream, opts, reader=None): def create_oebbook(log, path_or_stream, opts, reader=None):
''' '''

View File

@ -1571,26 +1571,26 @@ class OEBBook(object):
def decode(self, data): def decode(self, data):
"""Automatically decode :param:`data` into a `unicode` object.""" """Automatically decode :param:`data` into a `unicode` object."""
def fix_data(d):
return d.replace('\r\n', '\n').replace('\r', '\n')
if isinstance(data, unicode): if isinstance(data, unicode):
return data return fix_data(data)
if data[:2] in ('\xff\xfe', '\xfe\xff'): if data[:2] in ('\xff\xfe', '\xfe\xff'):
try: try:
return data.decode('utf-16') return fix_data(data.decode('utf-16'))
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
if self.encoding is not None: if self.encoding is not None:
try: try:
return data.decode(self.encoding) return fix_data(data.decode(self.encoding))
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
try: try:
return data.decode('utf-8') return fix_data(data.decode('utf-8'))
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
data, _ = xml_to_unicode(data) data, _ = xml_to_unicode(data)
data = data.replace('\r\n', '\n') return fix_data(data)
data = data.replace('\r', '\n')
return data
def to_opf1(self): def to_opf1(self):
"""Produce OPF 1.2 representing the book's metadata and structure. """Produce OPF 1.2 representing the book's metadata and structure.

View File

@ -173,18 +173,22 @@ class FlowSplitter(object):
if self.max_flow_size > 0: if self.max_flow_size > 0:
lt_found = False lt_found = False
self.log('\tLooking for large trees...') self.log('\tLooking for large trees in %s...'%item.href)
trees = list(self.trees) trees = list(self.trees)
for i, tree in enumerate(list(self.trees)): self.tree_map = {}
self.trees = [] for i, tree in enumerate(trees):
size = len(tostring(tree.getroot())) size = len(tostring(tree.getroot()))
if size > self.opts.profile.flow_size: if size > self.max_flow_size:
self.log('\tFound large tree #%d'%i)
lt_found = True lt_found = True
self.split_trees = []
self.split_to_size(tree) self.split_to_size(tree)
trees[i:i+1] = list(self.trees) self.tree_map[tree] = self.split_trees
if not lt_found: if not lt_found:
self.log_info('\tNo large trees found') self.log('\tNo large trees found')
self.trees = trees self.trees = []
for x in trees:
self.trees.extend(self.tree_map.get(x, [x]))
self.was_split = len(self.trees) > 1 self.was_split = len(self.trees) > 1
self.commit() self.commit()
@ -347,11 +351,10 @@ class FlowSplitter(object):
continue continue
size = len(tostring(r)) size = len(tostring(r))
if size <= self.max_flow_size: if size <= self.max_flow_size:
self.trees.append(t) self.split_trees.append(t)
#print tostring(t.getroot(), pretty_print=True) self.log.debug(
self.log.debug('\t\t\tCommitted sub-tree #%d (%d KB)', '\t\t\tCommitted sub-tree #%d (%d KB)'%(
len(self.trees), size/1024.) len(self.split_trees), size/1024.))
self.split_size += size
else: else:
self.split_to_size(t) self.split_to_size(t)

View File

@ -38,7 +38,7 @@ class RecipeInput(InputFormatPlugin):
]) ])
def convert(self, recipe_or_file, opts, file_ext, log, def convert(self, recipe_or_file, opts, file_ext, log,
accelerators, progress=lambda x, y: x): accelerators):
from calibre.web.feeds.recipes import \ from calibre.web.feeds.recipes import \
get_builtin_recipe, compile_recipe get_builtin_recipe, compile_recipe
if os.access(recipe_or_file, os.R_OK): if os.access(recipe_or_file, os.R_OK):
@ -51,7 +51,7 @@ class RecipeInput(InputFormatPlugin):
raise ValueError('%s is not a valid recipe file or builtin recipe' % raise ValueError('%s is not a valid recipe file or builtin recipe' %
recipe_or_file) recipe_or_file)
ro = recipe(opts, log, progress) ro = recipe(opts, log, self.report_progress)
ro.download() ro.download()
opts.output_profile.flow_size = 0 opts.output_profile.flow_size = 0