Sync to pluginize

2025-08-30 23:00:21 -04:00 · 2009-05-02 18:13:51 -04:00 · 2009-05-02 18:13:51 -04:00 · 9aae507c07
commit 9aae507c07
parent 65c53808da 3a99f99104
12 changed files with 128 additions and 190 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -290,6 +290,7 @@ from calibre.ebooks.comic.input import ComicInput
 from calibre.web.feeds.input import RecipeInput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.epub.output import EPUBOutput
 from calibre.ebooks.mobi.output import MOBIOutput
 from calibre.ebooks.txt.output import TXTOutput
 from calibre.ebooks.pdf.output import PDFOutput
 from calibre.ebooks.pml.input import PMLInput
@ -309,9 +310,9 @@ from calibre.devices.jetbook.driver import JETBOOK
 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
        TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
        FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
-        PMLOutput]
+        PMLOutput, MOBIOutput]
-plugins += [PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY, EB600, \
+plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY,
-        JETBOOK]
+        EB600, JETBOOK]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -149,6 +149,18 @@ class InputFormatPlugin(Plugin):
        '''
        raise NotImplementedError()
    def preprocess_html(self, html):
        '''
        This method is called by the conversion pipeline on all HTML before it
        is parsed. It is meant to be used to do any required preprocessing on
        the HTML, like removing hard line breaks, etc.
        :param html: A unicode string
        :return: A unicode string
        '''
        return html
    def convert(self, stream, options, file_ext, log, accelerators):
        '''
        This method must be implemented in sub-classes. It must return
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -126,9 +126,10 @@ def add_pipeline_options(parser, plumber):
              'STRUCTURE DETECTION' : (
                  _('Control auto-detection of document structure.'),
                  [
-                      'dont_split_on_page_breaks', 'chapter', 'chapter_mark',
+                      'chapter', 'chapter_mark',
                      'prefer_metadata_cover', 'remove_first_image',
                      'insert_metadata', 'page_breaks_before',
                      'preprocess_html',
                  ]
                  ),
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -131,18 +131,6 @@ OptionRecommendation(name='linearize_tables',
                )
        ),
 OptionRecommendation(name='dont_split_on_page_breaks',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Turn off splitting at page breaks. Normally, input '
                    'files are automatically split at every page break into '
                    'two files. This gives an output ebook that can be '
                    'parsed faster and with less resources. However, '
                    'splitting is slow and if your source file contains a '
                    'very large number of page breaks, you should turn off '
                    'splitting on page breaks.'
                )
        ),
 OptionRecommendation(name='level1_toc',
            recommended_value=None, level=OptionRecommendation.LOW,
            help=_('XPath expression that specifies all tags that '
@ -312,6 +300,14 @@ OptionRecommendation(name='insert_metadata',
            )
        ),
 OptionRecommendation(name='preprocess_html',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Attempt to detect and correct hard line breaks and other '
            'problems in the source file. This may make things worse, so use '
            'with care.'
            )
        ),
 OptionRecommendation(name='read_metadata_from_opf',
            recommended_value=None, level=OptionRecommendation.LOW,
@ -580,7 +576,8 @@ OptionRecommendation(name='list_recipes',
            self.log('Debug input called, aborting the rest of the pipeline.')
            return
        if not hasattr(self.oeb, 'manifest'):
-            self.oeb = create_oebbook(self.log, self.oeb, self.opts)
+            self.oeb = create_oebbook(self.log, self.oeb, self.opts,
                    self.input_plugin)
        pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
        pr(0., _('Running transforms on ebook...'))
@ -619,20 +616,14 @@ OptionRecommendation(name='list_recipes',
        flattener = CSSFlattener(fbase=fbase, fkey=fkey,
                lineh=self.opts.line_height,
-                untable=self.opts.linearize_tables)
+                untable=self.output_plugin.file_type in ('mobi','lit'),
                unfloat=self.output_plugin.file_type in ('mobi', 'lit'))
        flattener(self.oeb, self.opts)
-        if self.opts.linearize_tables:
+        if self.opts.linearize_tables and \
                self.output_plugin.file_type not in ('mobi', 'lrf'):
            from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
            LinearizeTables()(self.oeb, self.opts)
        pr(0.7)
        from calibre.ebooks.oeb.transforms.split import Split
        pbx = accelerators.get('pagebreaks', None)
        split = Split(not self.opts.dont_split_on_page_breaks,
                max_flow_size=self.opts.output_profile.flow_size,
                page_breaks_xpath=pbx)
        split(self.oeb, self.opts)
        pr(0.9)
        from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
@ -652,13 +643,14 @@ OptionRecommendation(name='list_recipes',
                self.opts, self.log)
        self.ui_reporter(1.)
-def create_oebbook(log, path_or_stream, opts, reader=None):
+def create_oebbook(log, path_or_stream, opts, input_plugin, reader=None):
    '''
    Create an OEBBook.
    '''
    from calibre.ebooks.oeb.base import OEBBook
-    html_preprocessor = HTMLPreProcessor()
+    html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html,
-    oeb = OEBBook(log, html_preprocessor=html_preprocessor,
+            opts.preprocess_html)
    oeb = OEBBook(log, html_preprocessor,
            pretty_print=opts.pretty_print)
    # Read OEB Book into OEBBook
    log('Parsing all content...')
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -26,16 +26,16 @@ def sanitize_head(match):
 def chap_head(match):
    chap = match.group('chap')
    title = match.group('title')
-    if not title: 
+    if not title:
               return '<h1>'+chap+'</h1><br/>\n'
-    else: 
+    else:
               return '<h1>'+chap+'<br/>\n'+title+'</h1><br/>\n'
 def wrap_lines(match):
    ital = match.group('ital')
-    if not ital: 
+    if not ital:
               return ' '
-    else: 
+    else:
               return ital+' '
 def line_length(raw, percent):
@ -106,7 +106,7 @@ class HTMLPreProcessor(object):
                  (re.compile(u'¨\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Ï'),
                  (re.compile(u'¨\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ä'),
                  (re.compile(u'¨\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ä'),
-                  
+
                  # Remove page links
                  (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
                  # Remove <hr> tags
@ -151,6 +151,9 @@ class HTMLPreProcessor(object):
                     (re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
                      lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
                     ]
    def __init__(self, input_plugin_preprocess, plugin_preprocess):
        self.input_plugin_preprocess = input_plugin_preprocess
        self.plugin_preprocess = plugin_preprocess
    def is_baen(self, src):
        return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
@ -175,7 +178,7 @@ class HTMLPreProcessor(object):
                # Un wrap using punctuation
                (re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % line_length(html, .4), re.UNICODE), wrap_lines),
            ]
-            
+
            rules = self.PDFTOHTML + line_length_rules
        else:
            rules = []
@ -192,5 +195,8 @@ class HTMLPreProcessor(object):
        html = XMLDECL_RE.sub('', html)
        if self.plugin_preprocess:
            html = self.input_plugin_preprocess(html)
        return html
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -28,7 +28,21 @@ class EPUBOutput(OutputFormatPlugin):
        OptionRecommendation(name='extract_to',
            help=_('Extract the contents of the generated EPUB file to the '
                'specified directory. The contents of the directory are first '
-                'deleted, so be careful.'))
+                'deleted, so be careful.')),
        OptionRecommendation(name='dont_split_on_page_breaks',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Turn off splitting at page breaks. Normally, input '
                    'files are automatically split at every page break into '
                    'two files. This gives an output ebook that can be '
                    'parsed faster and with less resources. However, '
                    'splitting is slow and if your source file contains a '
                    'very large number of page breaks, you should turn off '
                    'splitting on page breaks.'
                )
        ),
        ])
@ -88,6 +102,13 @@ class EPUBOutput(OutputFormatPlugin):
    def convert(self, oeb, output_path, input_plugin, opts, log):
        self.log, self.opts, self.oeb = log, opts, oeb
        from calibre.ebooks.oeb.transforms.split import Split
        split = Split(not self.opts.dont_split_on_page_breaks,
                max_flow_size=self.opts.output_profile.flow_size
                )
        split(self.oeb, self.opts)
        self.workaround_ade_quirks()
        from calibre.ebooks.oeb.transforms.rescale import RescaleImages
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -288,7 +288,7 @@ class HTMLInput(InputFormatPlugin):
            return opfpath
        from calibre.ebooks.conversion.plumber import create_oebbook
-        oeb = create_oebbook(log, opfpath, opts)
+        oeb = create_oebbook(log, opfpath, opts, self)
        from calibre.ebooks.oeb.transforms.package import Package
        Package(os.getcwdu())(oeb, opts)
--- a/src/calibre/ebooks/lit/input.py
+++ b/src/calibre/ebooks/lit/input.py
@ -19,6 +19,6 @@ class LITInput(InputFormatPlugin):
                accelerators):
        from calibre.ebooks.lit.reader import LitReader
        from calibre.ebooks.conversion.plumber import create_oebbook
-        return create_oebbook(log, stream, options, reader=LitReader)
+        return create_oebbook(log, stream, options, self, reader=LitReader)
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -80,19 +80,6 @@ class MobiMLizer(object):
    def __init__(self, ignore_tables=False):
        self.ignore_tables = ignore_tables
    @classmethod
    def config(cls, cfg):
        group = cfg.add_group('mobiml', _('Mobipocket markup options.'))
        group('ignore_tables', ['--ignore-tables'], default=False,
              help=_('Render HTML tables as blocks of text instead of actual '
                     'tables. This is neccessary if the HTML contains very '
                     'large or complex tables.'))
        return cfg
    @classmethod
    def generate(cls, opts):
        return cls(ignore_tables=opts.ignore_tables)
    def __call__(self, oeb, context):
        oeb.logger.info('Converting XHTML to Mobipocket markup...')
        self.oeb = oeb
--- a/src/calibre/ebooks/mobi/output.py
+++ b/src/calibre/ebooks/mobi/output.py
@ -0,0 +1,51 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.customize.conversion import OutputFormatPlugin
 from calibre.customize.conversion import OptionRecommendation
 class MOBIOutput(OutputFormatPlugin):
    name = 'MOBI Output'
    author = 'Marshall T. Vandegrift'
    file_type = 'mobi'
    options = set([
        OptionRecommendation(name='rescale_images', recommended_value=False,
            help=_('Modify images to meet Palm device size limitations.')
        ),
        OptionRecommendation(name='prefer_author_sort',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('When present, use author sort field as author.')
        ),
        OptionRecommendation(name='toc_title', recommended_value=None,
            help=_('Title for any generated in-line table of contents.')
        ),
    ])
    def convert(self, oeb, output_path, input_plugin, opts, log):
        self.log, self.opts, self.oeb = log, opts, oeb
        from calibre.ebooks.mobi.writer import PALM_MAX_IMAGE_SIZE, MobiWriter
        from calibre.ebooks.mobi.mobiml import MobiMLizer
        from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
        imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
        tocadder = HTMLTOCAdder(title=opts.toc_title)
        tocadder(oeb, opts)
        mangler = CaseMangler()
        mangler(oeb, opts)
        rasterizer = SVGRasterizer()
        rasterizer(oeb, opts)
        mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
        mobimlizer(oeb, opts)
        writer = MobiWriter(imagemax=imagemax,
                            prefer_author_sort=opts.prefer_author_sort)
        writer(oeb, output_path)
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -6,8 +6,6 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
 import sys
 import os
 from struct import pack
 import time
 import random
@ -16,24 +14,14 @@ import re
 from itertools import izip, count
 from collections import defaultdict
 from urlparse import urldefrag
 import logging
 from PIL import Image
 from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
    OEB_RASTER_IMAGES
 from calibre.ebooks.oeb.base import namespace, prefixname
 from calibre.ebooks.oeb.base import urlnormalize
 from calibre.ebooks.oeb.base import OEBBook
 from calibre.ebooks.oeb.profile import Context
 from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
 from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
 from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
 from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
 from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
 from calibre.ebooks.mobi.palmdoc import compress_doc
 from calibre.ebooks.mobi.langcodes import iana2mobi
-from calibre.ebooks.mobi.mobiml import MBP_NS, MobiMLizer
+from calibre.ebooks.mobi.mobiml import MBP_NS
 from calibre.customize.ui import run_plugins_on_postprocess
 from calibre.utils.config import Config, StringConfig
 # TODO:
 # - Allow override CSS (?)
@ -293,58 +281,22 @@ class Serializer(object):
                buffer.write('%010d' % ioff)
 class MobiFlattener(object):
    def config(self, cfg):
        return cfg
    def generate(self, opts):
        return self
    def __call__(self, oeb, context):
        fbase = context.dest.fbase
        fkey = context.dest.fnums.values()
        flattener = CSSFlattener(
            fbase=fbase, fkey=fkey, unfloat=True, untable=True)
        return flattener(oeb, context)
 class MobiWriter(object):
    COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
-    DEFAULT_PROFILE = 'CybookG3'
+    def __init__(self, compression=PALMDOC, imagemax=None,
    TRANSFORMS = [HTMLTOCAdder, CaseMangler, MobiFlattener(), SVGRasterizer,
                  ManifestTrimmer, MobiMLizer]
    def __init__(self, compression=None, imagemax=None,
                 prefer_author_sort=False):
        self._compression = compression or UNCOMPRESSED
        self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
        self._prefer_author_sort = prefer_author_sort
    @classmethod
    def config(cls, cfg):
        """Add any book-writing options to the :class:`Config` object
        :param:`cfg`.
        """
        mobi = cfg.add_group('mobipocket', _('Mobipocket-specific options.'))
        mobi('compress', ['--compress'], default=False,
             help=_('Compress file text using PalmDOC compression. '
                    'Results in smaller files, but takes a long time to run.'))
        mobi('rescale_images', ['--rescale-images'], default=False,
             help=_('Modify images to meet Palm device size limitations.'))
        mobi('prefer_author_sort', ['--prefer-author-sort'], default=False,
             help=_('When present, use the author sorting information for '
                    'generating the Mobipocket author metadata.'))
        return cfg
    @classmethod
    def generate(cls, opts):
        """Generate a Writer instance from command-line options."""
        compression = PALMDOC if opts.compress else UNCOMPRESSED
        imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
        prefer_author_sort = opts.prefer_author_sort
-        return cls(compression=compression, imagemax=imagemax,
+        return cls(compression=PALMDOC, imagemax=imagemax,
                   prefer_author_sort=prefer_author_sort)
    def __call__(self, oeb, path):
@ -577,88 +529,4 @@ class MobiWriter(object):
            self._write(record)
 def config(defaults=None):
    desc = _('Options to control the conversion to MOBI')
    _profiles = list(sorted(Context.PROFILES.keys()))
    if defaults is None:
        c = Config('mobi', desc)
    else:
        c = StringConfig(defaults, desc)
    profiles = c.add_group('profiles', _('Device renderer profiles. '
        'Affects conversion of font sizes, image rescaling and rasterization '
        'of tables. Valid profiles are: %s.') % ', '.join(_profiles))
    profiles('source_profile', ['--source-profile'],
             default='Browser', choices=_profiles,
             help=_("Source renderer profile. Default is %default."))
    profiles('dest_profile', ['--dest-profile'],
             default='CybookG3', choices=_profiles,
             help=_("Destination renderer profile. Default is %default."))
    c.add_opt('encoding', ['--encoding'], default=None,
              help=_('Character encoding for HTML files. Default is to auto detect.'))
    return c
 def option_parser():
    c = config()
    parser = c.option_parser(usage='%prog '+_('[options]')+' file.opf')
    parser.add_option(
        '-o', '--output', default=None,
        help=_('Output file. Default is derived from input filename.'))
    parser.add_option(
        '-v', '--verbose', default=0, action='count',
        help=_('Useful for debugging.'))
    return parser
 def oeb2mobi(opts, inpath):
    logger = Logger(logging.getLogger('oeb2mobi'))
    logger.setup_cli_handler(opts.verbose)
    outpath = opts.output
    if outpath is None:
        outpath = os.path.basename(inpath)
        outpath = os.path.splitext(outpath)[0] + '.mobi'
    source = opts.source_profile
    if source not in Context.PROFILES:
        logger.error(_('Unknown source profile %r') % source)
        return 1
    dest = opts.dest_profile
    if dest not in Context.PROFILES:
        logger.error(_('Unknown destination profile %r') % dest)
        return 1
    compression = PALMDOC if opts.compress else UNCOMPRESSED
    imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
    context = Context(source, dest)
    oeb = OEBBook(inpath, logger=logger, encoding=opts.encoding)
    tocadder = HTMLTOCAdder(title=opts.toc_title)
    tocadder.transform(oeb, context)
    mangler = CaseMangler()
    mangler.transform(oeb, context)
    fbase = context.dest.fbase
    fkey = context.dest.fnums.values()
    flattener = CSSFlattener(
        fbase=fbase, fkey=fkey, unfloat=True, untable=True)
    flattener.transform(oeb, context)
    rasterizer = SVGRasterizer()
    rasterizer.transform(oeb, context)
    trimmer = ManifestTrimmer()
    trimmer.transform(oeb, context)
    mobimlizer = MobiMLizer(ignore_tables=opts.ignore_tables)
    mobimlizer.transform(oeb, context)
    writer = MobiWriter(compression=compression, imagemax=imagemax,
                        prefer_author_sort=opts.prefer_author_sort)
    writer.dump(oeb, outpath)
    run_plugins_on_postprocess(outpath, 'mobi')
    logger.info(_('Output written to ') + outpath)
 def main(argv=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(argv[1:])
    if len(args) != 1:
        parser.print_help()
        return 1
    inpath = args[0]
    retval = oeb2mobi(opts, inpath)
    return retval
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -22,8 +22,7 @@ from cssutils import CSSParser
 from calibre.translations.dynamic import translate
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
-from calibre.ebooks.conversion.preprocess import HTMLPreProcessor, \
+from calibre.ebooks.conversion.preprocess import CSSPreProcessor
        CSSPreProcessor
 XML_NS       = 'http://www.w3.org/XML/1998/namespace'
 XHTML_NS     = 'http://www.w3.org/1999/xhtml'
@ -1506,7 +1505,7 @@ class OEBBook(object):
    COVER_OBJECT_XP = XPath('h:body//h:object[@data][position() = 1]')
    def __init__(self, logger,
-            html_preprocessor=HTMLPreProcessor(),
+            html_preprocessor,
            css_preprocessor=CSSPreProcessor(),
            encoding='utf-8', pretty_print=False):
        """Create empty book.  Arguments: