Support for progress reporting in the conversion pipeline and fix size based splitting code

2025-07-09 03:04:10 -04:00 · 2009-04-29 20:15:31 -07:00 · 2009-04-29 20:15:31 -07:00 · 13e7d6334b
commit 13e7d6334b
parent dbc2d315ed
7 changed files with 88 additions and 31 deletions
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -79,6 +79,10 @@ class OptionRecommendation(object):
                             repr(self.recommended_value) +
                             ' is not a string or a number')
 class DummyReporter(object):
    def __call__(self, percent, msg=''):
        pass
 class InputFormatPlugin(Plugin):
    '''
@ -133,6 +137,10 @@ class InputFormatPlugin(Plugin):
    #: (option_name, recommended_value, recommendation_level)
    recommendations = set([])
    def __init__(self, *args):
        Plugin.__init__(self, *args)
        self.report_progress = DummyReporter()
    def get_images(self):
        '''
        Return a list of absolute paths to the images, if this input plugin
@ -242,6 +250,11 @@ class OutputFormatPlugin(Plugin):
    #: (option_name, recommended_value, recommendation_level)
    recommendations = set([])
    def __init__(self, *args):
        Plugin.__init__(self, *args)
        self.report_progress = DummyReporter()
    def convert(self, oeb_book, output, input_plugin, opts, log):
        '''
        Render the contents of `oeb_book` (which is an instance of
--- a/src/calibre/ebooks/comic/input.py
+++ b/src/calibre/ebooks/comic/input.py
@ -343,7 +343,7 @@ class ComicInput(InputFormatPlugin):
            new_pages = n2
        else:
            new_pages, failures = process_pages(new_pages, self.opts,
-                    self.progress, tdir2)
+                    self.report_progress, tdir2)
            if not new_pages:
                raise ValueError('Could not find any valid pages in comic: %s'
                        % comic)
@ -360,13 +360,12 @@ class ComicInput(InputFormatPlugin):
    def get_images(self):
        return self._images
-    def convert(self, stream, opts, file_ext, log, accelerators,
+    def convert(self, stream, opts, file_ext, log, accelerators):
            progress=lambda p, m : m):
        from calibre.ebooks.metadata import MetaInformation
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.toc import TOC
-        self.opts, self.log, self.progress = opts, log, progress
+        self.opts, self.log= opts, log
        if file_ext == 'cbc':
            comics_ = self.get_comics_from_collection(stream)
        else:
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -175,8 +175,20 @@ def add_pipeline_options(parser, plumber):
 def option_parser():
    return OptionParser(usage=USAGE)
 class ProgressBar(object):
    def __init__(self, log):
        self.log = log
    def __call__(self, frac, msg=''):
        if msg:
            percent = int(frac*100)
            self.log('%d%% %s'%(percent, msg))
 def main(args=sys.argv):
    log = Log()
    reporter = ProgressBar(log)
    parser = option_parser()
    if len(args) < 3:
        print_help(parser, log)
@ -186,7 +198,7 @@ def main(args=sys.argv):
    from calibre.ebooks.conversion.plumber import Plumber
-    plumber = Plumber(input, output, log)
+    plumber = Plumber(input, output, log, reporter)
    add_input_output_options(parser, plumber)
    add_pipeline_options(parser, plumber)
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -5,7 +5,7 @@ __docformat__ = 'restructuredtext en'
 import os, re
-from calibre.customize.conversion import OptionRecommendation
+from calibre.customize.conversion import OptionRecommendation, DummyReporter
 from calibre.customize.ui import input_profiles, output_profiles, \
        plugin_for_input_format, plugin_for_output_format
 from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
@ -22,6 +22,17 @@ def supported_input_formats():
 class OptionValues(object):
    pass
 class CompositeProgressReporter(object):
    def __init__(self, global_min, global_max, global_reporter):
        self.global_min, self.global_max = global_min, global_max
        self.global_reporter = global_reporter
    def __call__(self, fraction, msg=''):
        global_frac = self.global_min + fraction * \
                (self.global_max - self.global_min)
        self.global_reporter(global_frac, msg)
 class Plumber(object):
    '''
    The `Plumber` manages the conversion pipeline. An UI should call the methods
@ -35,7 +46,7 @@ class Plumber(object):
        'tags', 'book_producer', 'language'
        ]
-    def __init__(self, input, output, log):
+    def __init__(self, input, output, log, report_progress=DummyReporter()):
        '''
        :param input: Path to input file.
        :param output: Path to output file/directory
@ -43,6 +54,7 @@ class Plumber(object):
        self.input = os.path.abspath(input)
        self.output = os.path.abspath(output)
        self.log = log
        self.ui_reporter = report_progress
        # Initialize the conversion options that are independent of input and
        # output formats. The input and output plugins can still disable these
@ -63,7 +75,8 @@ OptionRecommendation(name='input_profile',
                   'conversion system information on how to interpret '
                   'various information in the input document. For '
                   'example resolution dependent lengths (i.e. lengths in '
-                   'pixels).')
+                   'pixels). Choices are:')+\
                        ', '.join([x.short_name for x in input_profiles()])
        ),
 OptionRecommendation(name='output_profile',
@ -73,8 +86,9 @@ OptionRecommendation(name='output_profile',
                   'tells the conversion system how to optimize the '
                   'created document for the specified device. In some cases, '
                   'an output profile is required to produce documents that '
-                   'will work on a device. For example EPUB on the SONY reader.'
+                   'will work on a device. For example EPUB on the SONY reader. '
-                   )
+                   'Choices are:') + \
                           ', '.join([x.short_name for x in output_profiles()])
        ),
 OptionRecommendation(name='base_font_size',
@ -552,6 +566,9 @@ OptionRecommendation(name='list_recipes',
        if hasattr(self.opts, 'lrf') and self.output_plugin.file_type == 'lrf':
            self.opts.lrf = True
        self.ui_reporter(0.01, _('Converting input to HTML...'))
        ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
        self.input_plugin.report_progress = ir
        self.oeb = self.input_plugin(stream, self.opts,
                                    self.input_fmt, self.log,
                                    accelerators, tdir)
@ -560,9 +577,12 @@ OptionRecommendation(name='list_recipes',
            return
        if not hasattr(self.oeb, 'manifest'):
            self.oeb = create_oebbook(self.log, self.oeb, self.opts)
        pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
        pr(0., _('Running transforms on ebook...'))
        from calibre.ebooks.oeb.transforms.guide import Clean
        Clean()(self.oeb, self.opts)
        pr(0.1)
        self.opts.source = self.opts.input_profile
        self.opts.dest = self.opts.output_profile
@ -570,9 +590,11 @@ OptionRecommendation(name='list_recipes',
        from calibre.ebooks.oeb.transforms.metadata import MergeMetadata
        MergeMetadata()(self.oeb, self.user_metadata,
                self.opts.prefer_metadata_cover)
        pr(0.2)
        from calibre.ebooks.oeb.transforms.structure import DetectStructure
        DetectStructure()(self.oeb, self.opts)
        pr(0.35)
        from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
        fbase = self.opts.base_font_size
@ -586,6 +608,7 @@ OptionRecommendation(name='list_recipes',
        from calibre.ebooks.oeb.transforms.jacket import Jacket
        Jacket()(self.oeb, self.opts)
        pr(0.4)
        if self.opts.extra_css and os.path.exists(self.opts.extra_css):
            self.opts.extra_css = open(self.opts.extra_css, 'rb').read()
@ -598,6 +621,7 @@ OptionRecommendation(name='list_recipes',
        if self.opts.linearize_tables:
            from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
            LinearizeTables()(self.oeb, self.opts)
        pr(0.7)
        from calibre.ebooks.oeb.transforms.split import Split
        pbx = accelerators.get('pagebreaks', None)
@ -605,6 +629,7 @@ OptionRecommendation(name='list_recipes',
                max_flow_size=self.opts.output_profile.flow_size,
                page_breaks_xpath=pbx)
        split(self.oeb, self.opts)
        pr(0.9)
        from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
@ -613,10 +638,15 @@ OptionRecommendation(name='list_recipes',
        trimmer(self.oeb, self.opts)
        self.oeb.toc.rationalize_play_orders()
        pr(1.)
        self.log.info('Creating %s...'%self.output_plugin.name)
        our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
        self.output_plugin.report_progress = our
        our(0., _('Creating')+' %s'%self.output_plugin.name)
        self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
                self.opts, self.log)
        self.ui_reporter(1.)
 def create_oebbook(log, path_or_stream, opts, reader=None):
    '''
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -1571,26 +1571,26 @@ class OEBBook(object):
    def decode(self, data):
        """Automatically decode :param:`data` into a `unicode` object."""
        def fix_data(d):
            return d.replace('\r\n', '\n').replace('\r', '\n')
        if isinstance(data, unicode):
-            return data
+            return fix_data(data)
        if data[:2] in ('\xff\xfe', '\xfe\xff'):
            try:
-                return data.decode('utf-16')
+                return fix_data(data.decode('utf-16'))
            except UnicodeDecodeError:
                pass
        if self.encoding is not None:
            try:
-                return data.decode(self.encoding)
+                return fix_data(data.decode(self.encoding))
            except UnicodeDecodeError:
                pass
        try:
-            return data.decode('utf-8')
+            return fix_data(data.decode('utf-8'))
        except UnicodeDecodeError:
            pass
        data, _ = xml_to_unicode(data)
-        data = data.replace('\r\n', '\n')
+        return fix_data(data)
        data = data.replace('\r', '\n')
        return data
    def to_opf1(self):
        """Produce OPF 1.2 representing the book's metadata and structure.
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -173,18 +173,22 @@ class FlowSplitter(object):
        if self.max_flow_size > 0:
            lt_found = False
-            self.log('\tLooking for large trees...')
+            self.log('\tLooking for large trees in %s...'%item.href)
            trees = list(self.trees)
-            for i, tree in enumerate(list(self.trees)):
+            self.tree_map = {}
-                self.trees = []
+            for i, tree in enumerate(trees):
                size = len(tostring(tree.getroot()))
-                if size > self.opts.profile.flow_size:
+                if size > self.max_flow_size:
                    self.log('\tFound large tree #%d'%i)
                    lt_found = True
                    self.split_trees = []
                    self.split_to_size(tree)
-                    trees[i:i+1] = list(self.trees)
+                    self.tree_map[tree] = self.split_trees
            if not lt_found:
-                self.log_info('\tNo large trees found')
+                self.log('\tNo large trees found')
-            self.trees = trees
+            self.trees = []
            for x in trees:
                self.trees.extend(self.tree_map.get(x, [x]))
        self.was_split = len(self.trees) > 1
        self.commit()
@ -347,11 +351,10 @@ class FlowSplitter(object):
                continue
            size = len(tostring(r))
            if size <= self.max_flow_size:
-                self.trees.append(t)
+                self.split_trees.append(t)
-                #print tostring(t.getroot(), pretty_print=True)
+                self.log.debug(
-                self.log.debug('\t\t\tCommitted sub-tree #%d (%d KB)',
+                    '\t\t\tCommitted sub-tree #%d (%d KB)'%(
-                               len(self.trees), size/1024.)
+                               len(self.split_trees), size/1024.))
                self.split_size += size
            else:
                self.split_to_size(t)
--- a/src/calibre/web/feeds/input.py
+++ b/src/calibre/web/feeds/input.py
@ -38,7 +38,7 @@ class RecipeInput(InputFormatPlugin):
        ])
    def convert(self, recipe_or_file, opts, file_ext, log,
-            accelerators, progress=lambda x, y: x):
+            accelerators):
        from calibre.web.feeds.recipes import \
                get_builtin_recipe, compile_recipe
        if os.access(recipe_or_file, os.R_OK):
@ -51,7 +51,7 @@ class RecipeInput(InputFormatPlugin):
            raise ValueError('%s is not a valid recipe file or builtin recipe' %
                    recipe_or_file)
-        ro = recipe(opts, log, progress)
+        ro = recipe(opts, log, self.report_progress)
        ro.download()
        opts.output_profile.flow_size = 0