diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py index 7573dddeac..7920b823de 100644 --- a/src/calibre/customize/conversion.py +++ b/src/calibre/customize/conversion.py @@ -79,6 +79,10 @@ class OptionRecommendation(object): repr(self.recommended_value) + ' is not a string or a number') +class DummyReporter(object): + + def __call__(self, percent, msg=''): + pass class InputFormatPlugin(Plugin): ''' @@ -133,6 +137,10 @@ class InputFormatPlugin(Plugin): #: (option_name, recommended_value, recommendation_level) recommendations = set([]) + def __init__(self, *args): + Plugin.__init__(self, *args) + self.report_progress = DummyReporter() + def get_images(self): ''' Return a list of absolute paths to the images, if this input plugin @@ -242,6 +250,11 @@ class OutputFormatPlugin(Plugin): #: (option_name, recommended_value, recommendation_level) recommendations = set([]) + def __init__(self, *args): + Plugin.__init__(self, *args) + self.report_progress = DummyReporter() + + def convert(self, oeb_book, output, input_plugin, opts, log): ''' Render the contents of `oeb_book` (which is an instance of diff --git a/src/calibre/ebooks/comic/input.py b/src/calibre/ebooks/comic/input.py index 82070bbc72..046acb4232 100755 --- a/src/calibre/ebooks/comic/input.py +++ b/src/calibre/ebooks/comic/input.py @@ -343,7 +343,7 @@ class ComicInput(InputFormatPlugin): new_pages = n2 else: new_pages, failures = process_pages(new_pages, self.opts, - self.progress, tdir2) + self.report_progress, tdir2) if not new_pages: raise ValueError('Could not find any valid pages in comic: %s' % comic) @@ -360,13 +360,12 @@ class ComicInput(InputFormatPlugin): def get_images(self): return self._images - def convert(self, stream, opts, file_ext, log, accelerators, - progress=lambda p, m : m): + def convert(self, stream, opts, file_ext, log, accelerators): from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.metadata.toc import TOC - self.opts, self.log, self.progress = opts, log, progress + self.opts, self.log= opts, log if file_ext == 'cbc': comics_ = self.get_comics_from_collection(stream) else: diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index d8de702915..b165fbf8f4 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -175,8 +175,20 @@ def add_pipeline_options(parser, plumber): def option_parser(): return OptionParser(usage=USAGE) + +class ProgressBar(object): + + def __init__(self, log): + self.log = log + + def __call__(self, frac, msg=''): + if msg: + percent = int(frac*100) + self.log('%d%% %s'%(percent, msg)) + def main(args=sys.argv): log = Log() + reporter = ProgressBar(log) parser = option_parser() if len(args) < 3: print_help(parser, log) @@ -186,7 +198,7 @@ def main(args=sys.argv): from calibre.ebooks.conversion.plumber import Plumber - plumber = Plumber(input, output, log) + plumber = Plumber(input, output, log, reporter) add_input_output_options(parser, plumber) add_pipeline_options(parser, plumber) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 1ef58e1d95..9987ec0243 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -5,7 +5,7 @@ __docformat__ = 'restructuredtext en' import os, re -from calibre.customize.conversion import OptionRecommendation +from calibre.customize.conversion import OptionRecommendation, DummyReporter from calibre.customize.ui import input_profiles, output_profiles, \ plugin_for_input_format, plugin_for_output_format from calibre.ebooks.conversion.preprocess import HTMLPreProcessor @@ -22,6 +22,17 @@ def supported_input_formats(): class OptionValues(object): pass +class CompositeProgressReporter(object): + + def __init__(self, global_min, global_max, global_reporter): + self.global_min, self.global_max = global_min, global_max + self.global_reporter = global_reporter + + def __call__(self, fraction, msg=''): + global_frac = self.global_min + fraction * \ + (self.global_max - self.global_min) + self.global_reporter(global_frac, msg) + class Plumber(object): ''' The `Plumber` manages the conversion pipeline. An UI should call the methods @@ -35,7 +46,7 @@ class Plumber(object): 'tags', 'book_producer', 'language' ] - def __init__(self, input, output, log): + def __init__(self, input, output, log, report_progress=DummyReporter()): ''' :param input: Path to input file. :param output: Path to output file/directory @@ -43,6 +54,7 @@ class Plumber(object): self.input = os.path.abspath(input) self.output = os.path.abspath(output) self.log = log + self.ui_reporter = report_progress # Initialize the conversion options that are independent of input and # output formats. The input and output plugins can still disable these @@ -63,7 +75,8 @@ OptionRecommendation(name='input_profile', 'conversion system information on how to interpret ' 'various information in the input document. For ' 'example resolution dependent lengths (i.e. lengths in ' - 'pixels).') + 'pixels). Choices are:')+\ + ', '.join([x.short_name for x in input_profiles()]) ), OptionRecommendation(name='output_profile', @@ -73,8 +86,9 @@ OptionRecommendation(name='output_profile', 'tells the conversion system how to optimize the ' 'created document for the specified device. In some cases, ' 'an output profile is required to produce documents that ' - 'will work on a device. For example EPUB on the SONY reader.' - ) + 'will work on a device. For example EPUB on the SONY reader. ' + 'Choices are:') + \ + ', '.join([x.short_name for x in output_profiles()]) ), OptionRecommendation(name='base_font_size', @@ -552,6 +566,9 @@ OptionRecommendation(name='list_recipes', if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf': self.opts.lrf = True + self.ui_reporter(0.01, _('Converting input to HTML...')) + ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter) + self.input_plugin.report_progress = ir self.oeb = self.input_plugin(stream, self.opts, self.input_fmt, self.log, accelerators, tdir) @@ -560,9 +577,12 @@ OptionRecommendation(name='list_recipes', return if not hasattr(self.oeb, 'manifest'): self.oeb = create_oebbook(self.log, self.oeb, self.opts) + pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter) + pr(0., _('Running transforms on ebook...')) from calibre.ebooks.oeb.transforms.guide import Clean Clean()(self.oeb, self.opts) + pr(0.1) self.opts.source = self.opts.input_profile self.opts.dest = self.opts.output_profile @@ -570,9 +590,11 @@ OptionRecommendation(name='list_recipes', from calibre.ebooks.oeb.transforms.metadata import MergeMetadata MergeMetadata()(self.oeb, self.user_metadata, self.opts.prefer_metadata_cover) + pr(0.2) from calibre.ebooks.oeb.transforms.structure import DetectStructure DetectStructure()(self.oeb, self.opts) + pr(0.35) from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener fbase = self.opts.base_font_size @@ -586,6 +608,7 @@ OptionRecommendation(name='list_recipes', from calibre.ebooks.oeb.transforms.jacket import Jacket Jacket()(self.oeb, self.opts) + pr(0.4) if self.opts.extra_css and os.path.exists(self.opts.extra_css): self.opts.extra_css = open(self.opts.extra_css, 'rb').read() @@ -598,6 +621,7 @@ OptionRecommendation(name='list_recipes', if self.opts.linearize_tables: from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables LinearizeTables()(self.oeb, self.opts) + pr(0.7) from calibre.ebooks.oeb.transforms.split import Split pbx = accelerators.get('pagebreaks', None) @@ -605,6 +629,7 @@ OptionRecommendation(name='list_recipes', max_flow_size=self.opts.output_profile.flow_size, page_breaks_xpath=pbx) split(self.oeb, self.opts) + pr(0.9) from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer @@ -613,10 +638,15 @@ OptionRecommendation(name='list_recipes', trimmer(self.oeb, self.opts) self.oeb.toc.rationalize_play_orders() + pr(1.) self.log.info('Creating %s...'%self.output_plugin.name) + our = CompositeProgressReporter(0.67, 1., self.ui_reporter) + self.output_plugin.report_progress = our + our(0., _('Creating')+' %s'%self.output_plugin.name) self.output_plugin.convert(self.oeb, self.output, self.input_plugin, self.opts, self.log) + self.ui_reporter(1.) def create_oebbook(log, path_or_stream, opts, reader=None): ''' diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index f5395e04fe..faf2d02dc4 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -1571,26 +1571,26 @@ class OEBBook(object): def decode(self, data): """Automatically decode :param:`data` into a `unicode` object.""" + def fix_data(d): + return d.replace('\r\n', '\n').replace('\r', '\n') if isinstance(data, unicode): - return data + return fix_data(data) if data[:2] in ('\xff\xfe', '\xfe\xff'): try: - return data.decode('utf-16') + return fix_data(data.decode('utf-16')) except UnicodeDecodeError: pass if self.encoding is not None: try: - return data.decode(self.encoding) + return fix_data(data.decode(self.encoding)) except UnicodeDecodeError: pass try: - return data.decode('utf-8') + return fix_data(data.decode('utf-8')) except UnicodeDecodeError: pass data, _ = xml_to_unicode(data) - data = data.replace('\r\n', '\n') - data = data.replace('\r', '\n') - return data + return fix_data(data) def to_opf1(self): """Produce OPF 1.2 representing the book's metadata and structure. diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index ec3d63192d..e83f211fb0 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -173,18 +173,22 @@ class FlowSplitter(object): if self.max_flow_size > 0: lt_found = False - self.log('\tLooking for large trees...') + self.log('\tLooking for large trees in %s...'%item.href) trees = list(self.trees) - for i, tree in enumerate(list(self.trees)): - self.trees = [] + self.tree_map = {} + for i, tree in enumerate(trees): size = len(tostring(tree.getroot())) - if size > self.opts.profile.flow_size: + if size > self.max_flow_size: + self.log('\tFound large tree #%d'%i) lt_found = True + self.split_trees = [] self.split_to_size(tree) - trees[i:i+1] = list(self.trees) + self.tree_map[tree] = self.split_trees if not lt_found: - self.log_info('\tNo large trees found') - self.trees = trees + self.log('\tNo large trees found') + self.trees = [] + for x in trees: + self.trees.extend(self.tree_map.get(x, [x])) self.was_split = len(self.trees) > 1 self.commit() @@ -347,11 +351,10 @@ class FlowSplitter(object): continue size = len(tostring(r)) if size <= self.max_flow_size: - self.trees.append(t) - #print tostring(t.getroot(), pretty_print=True) - self.log.debug('\t\t\tCommitted sub-tree #%d (%d KB)', - len(self.trees), size/1024.) - self.split_size += size + self.split_trees.append(t) + self.log.debug( + '\t\t\tCommitted sub-tree #%d (%d KB)'%( + len(self.split_trees), size/1024.)) else: self.split_to_size(t) diff --git a/src/calibre/web/feeds/input.py b/src/calibre/web/feeds/input.py index 21324293d3..e0a8b807c8 100644 --- a/src/calibre/web/feeds/input.py +++ b/src/calibre/web/feeds/input.py @@ -38,7 +38,7 @@ class RecipeInput(InputFormatPlugin): ]) def convert(self, recipe_or_file, opts, file_ext, log, - accelerators, progress=lambda x, y: x): + accelerators): from calibre.web.feeds.recipes import \ get_builtin_recipe, compile_recipe if os.access(recipe_or_file, os.R_OK): @@ -51,7 +51,7 @@ class RecipeInput(InputFormatPlugin): raise ValueError('%s is not a valid recipe file or builtin recipe' % recipe_or_file) - ro = recipe(opts, log, progress) + ro = recipe(opts, log, self.report_progress) ro.download() opts.output_profile.flow_size = 0