diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index f52c42811b..682c82cd1b 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -290,6 +290,7 @@ from calibre.ebooks.comic.input import ComicInput
 from calibre.web.feeds.input import RecipeInput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.epub.output import EPUBOutput
+from calibre.ebooks.mobi.output import MOBIOutput
 from calibre.ebooks.txt.output import TXTOutput
 from calibre.ebooks.pdf.output import PDFOutput
 from calibre.ebooks.pml.input import PMLInput
@@ -309,9 +310,9 @@ from calibre.devices.jetbook.driver import JETBOOK
 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
         TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput, ComicInput,
         FB2Input, ODTInput, RTFInput, EPUBOutput, RecipeInput, PMLInput,
-        PMLOutput]
-plugins += [PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY, EB600, \
-        JETBOOK]
+        PMLOutput, MOBIOutput]
+plugins += [PRS500, PRS505, PRS700, CYBOOKG3, KINDLE, KINDLE2, BLACKBERRY,
+        EB600, JETBOOK]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                         x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py
index 7920b823de..3a89a9b156 100644
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@@ -149,6 +149,18 @@ class InputFormatPlugin(Plugin):
         '''
         raise NotImplementedError()
 
+    def preprocess_html(self, html):
+        '''
+        This method is called by the conversion pipeline on all HTML before it
+        is parsed. It is meant to be used to do any required preprocessing on
+        the HTML, like removing hard line breaks, etc.
+
+        :param html: A unicode string
+        :return: A unicode string
+        '''
+        return html
+
+
     def convert(self, stream, options, file_ext, log, accelerators):
         '''
         This method must be implemented in sub-classes. It must return
diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py
index 53b1a2065d..f07c2d86ef 100644
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@@ -126,9 +126,10 @@ def add_pipeline_options(parser, plumber):
               'STRUCTURE DETECTION' : (
                   _('Control auto-detection of document structure.'),
                   [
-                      'dont_split_on_page_breaks', 'chapter', 'chapter_mark',
+                      'chapter', 'chapter_mark',
                       'prefer_metadata_cover', 'remove_first_image',
                       'insert_metadata', 'page_breaks_before',
+                      'preprocess_html',
                   ]
                   ),
 
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index d1630a25f2..7c654f924d 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -131,18 +131,6 @@ OptionRecommendation(name='linearize_tables',
                 )
         ),
 
-OptionRecommendation(name='dont_split_on_page_breaks',
-            recommended_value=False, level=OptionRecommendation.LOW,
-            help=_('Turn off splitting at page breaks. Normally, input '
-                    'files are automatically split at every page break into '
-                    'two files. This gives an output ebook that can be '
-                    'parsed faster and with less resources. However, '
-                    'splitting is slow and if your source file contains a '
-                    'very large number of page breaks, you should turn off '
-                    'splitting on page breaks.'
-                )
-        ),
-
 OptionRecommendation(name='level1_toc',
             recommended_value=None, level=OptionRecommendation.LOW,
             help=_('XPath expression that specifies all tags that '
@@ -312,6 +300,14 @@ OptionRecommendation(name='insert_metadata',
             )
         ),
 
+OptionRecommendation(name='preprocess_html',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Attempt to detect and correct hard line breaks and other '
+            'problems in the source file. This may make things worse, so use '
+            'with care.'
+            )
+        ),
+
 
 OptionRecommendation(name='read_metadata_from_opf',
             recommended_value=None, level=OptionRecommendation.LOW,
@@ -580,7 +576,8 @@ OptionRecommendation(name='list_recipes',
             self.log('Debug input called, aborting the rest of the pipeline.')
             return
         if not hasattr(self.oeb, 'manifest'):
-            self.oeb = create_oebbook(self.log, self.oeb, self.opts)
+            self.oeb = create_oebbook(self.log, self.oeb, self.opts,
+                    self.input_plugin)
         pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
         pr(0., _('Running transforms on ebook...'))
 
@@ -619,20 +616,14 @@ OptionRecommendation(name='list_recipes',
 
         flattener = CSSFlattener(fbase=fbase, fkey=fkey,
                 lineh=self.opts.line_height,
-                untable=self.opts.linearize_tables)
+                untable=self.output_plugin.file_type in ('mobi','lit'),
+                unfloat=self.output_plugin.file_type in ('mobi', 'lit'))
         flattener(self.oeb, self.opts)
 
-        if self.opts.linearize_tables:
+        if self.opts.linearize_tables and \
+                self.output_plugin.file_type not in ('mobi', 'lrf'):
             from calibre.ebooks.oeb.transforms.linearize_tables import LinearizeTables
             LinearizeTables()(self.oeb, self.opts)
-        pr(0.7)
-
-        from calibre.ebooks.oeb.transforms.split import Split
-        pbx = accelerators.get('pagebreaks', None)
-        split = Split(not self.opts.dont_split_on_page_breaks,
-                max_flow_size=self.opts.output_profile.flow_size,
-                page_breaks_xpath=pbx)
-        split(self.oeb, self.opts)
         pr(0.9)
 
         from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
@@ -652,13 +643,14 @@ OptionRecommendation(name='list_recipes',
                 self.opts, self.log)
         self.ui_reporter(1.)
 
-def create_oebbook(log, path_or_stream, opts, reader=None):
+def create_oebbook(log, path_or_stream, opts, input_plugin, reader=None):
     '''
     Create an OEBBook.
     '''
     from calibre.ebooks.oeb.base import OEBBook
-    html_preprocessor = HTMLPreProcessor()
-    oeb = OEBBook(log, html_preprocessor=html_preprocessor,
+    html_preprocessor = HTMLPreProcessor(input_plugin.preprocess_html,
+            opts.preprocess_html)
+    oeb = OEBBook(log, html_preprocessor,
             pretty_print=opts.pretty_print)
     # Read OEB Book into OEBBook
     log('Parsing all content...')
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 9bfe6d4255..76fc36708e 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -26,16 +26,16 @@ def sanitize_head(match):
 def chap_head(match):
     chap = match.group('chap')
     title = match.group('title')
-    if not title: 
+    if not title:
                return '<h1>'+chap+'</h1><br/>\n'
-    else: 
+    else:
                return '<h1>'+chap+'<br/>\n'+title+'</h1><br/>\n'
 
 def wrap_lines(match):
     ital = match.group('ital')
-    if not ital: 
+    if not ital:
                return ' '
-    else: 
+    else:
                return ital+' '
 
 def line_length(raw, percent):
@@ -106,7 +106,7 @@ class HTMLPreProcessor(object):
                   (re.compile(u'¨\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Ï'),
                   (re.compile(u'¨\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ä'),
                   (re.compile(u'¨\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ä'),
-                  
+
                   # Remove page links
                   (re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
                   # Remove <hr> tags
@@ -151,6 +151,9 @@ class HTMLPreProcessor(object):
                      (re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
                       lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
                      ]
+    def __init__(self, input_plugin_preprocess, plugin_preprocess):
+        self.input_plugin_preprocess = input_plugin_preprocess
+        self.plugin_preprocess = plugin_preprocess
 
     def is_baen(self, src):
         return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
@@ -175,7 +178,7 @@ class HTMLPreProcessor(object):
                 # Un wrap using punctuation
                 (re.compile(r'(?<=.{%i}[a-z,;:-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?[\w\d])' % line_length(html, .4), re.UNICODE), wrap_lines),
             ]
-            
+
             rules = self.PDFTOHTML + line_length_rules
         else:
             rules = []
@@ -192,5 +195,8 @@ class HTMLPreProcessor(object):
 
         html = XMLDECL_RE.sub('', html)
 
+        if self.plugin_preprocess:
+            html = self.input_plugin_preprocess(html)
+
         return html
 
diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py
index d5f0a9349a..aba9bff0d8 100644
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@@ -28,7 +28,21 @@ class EPUBOutput(OutputFormatPlugin):
         OptionRecommendation(name='extract_to',
             help=_('Extract the contents of the generated EPUB file to the '
                 'specified directory. The contents of the directory are first '
-                'deleted, so be careful.'))
+                'deleted, so be careful.')),
+
+        OptionRecommendation(name='dont_split_on_page_breaks',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Turn off splitting at page breaks. Normally, input '
+                    'files are automatically split at every page break into '
+                    'two files. This gives an output ebook that can be '
+                    'parsed faster and with less resources. However, '
+                    'splitting is slow and if your source file contains a '
+                    'very large number of page breaks, you should turn off '
+                    'splitting on page breaks.'
+                )
+        ),
+
+
         ])
 
 
@@ -88,6 +102,13 @@ class EPUBOutput(OutputFormatPlugin):
     def convert(self, oeb, output_path, input_plugin, opts, log):
         self.log, self.opts, self.oeb = log, opts, oeb
 
+        from calibre.ebooks.oeb.transforms.split import Split
+        split = Split(not self.opts.dont_split_on_page_breaks,
+                max_flow_size=self.opts.output_profile.flow_size
+                )
+        split(self.oeb, self.opts)
+
+
         self.workaround_ade_quirks()
 
         from calibre.ebooks.oeb.transforms.rescale import RescaleImages
diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py
index 252032a23d..255d975b1e 100644
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@@ -288,7 +288,7 @@ class HTMLInput(InputFormatPlugin):
             return opfpath
 
         from calibre.ebooks.conversion.plumber import create_oebbook
-        oeb = create_oebbook(log, opfpath, opts)
+        oeb = create_oebbook(log, opfpath, opts, self)
 
         from calibre.ebooks.oeb.transforms.package import Package
         Package(os.getcwdu())(oeb, opts)
diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/lit/input.py
index 2d726f7eeb..409482da29 100644
--- a/src/calibre/ebooks/lit/input.py
+++ b/src/calibre/ebooks/lit/input.py
@@ -19,6 +19,6 @@ class LITInput(InputFormatPlugin):
                 accelerators):
         from calibre.ebooks.lit.reader import LitReader
         from calibre.ebooks.conversion.plumber import create_oebbook
-        return create_oebbook(log, stream, options, reader=LitReader)
+        return create_oebbook(log, stream, options, self, reader=LitReader)
 
 
diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py
index 18f53317e0..a2d999ffc8 100644
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@@ -80,19 +80,6 @@ class MobiMLizer(object):
     def __init__(self, ignore_tables=False):
         self.ignore_tables = ignore_tables
 
-    @classmethod
-    def config(cls, cfg):
-        group = cfg.add_group('mobiml', _('Mobipocket markup options.'))
-        group('ignore_tables', ['--ignore-tables'], default=False,
-              help=_('Render HTML tables as blocks of text instead of actual '
-                     'tables. This is neccessary if the HTML contains very '
-                     'large or complex tables.'))
-        return cfg
-
-    @classmethod
-    def generate(cls, opts):
-        return cls(ignore_tables=opts.ignore_tables)
-
     def __call__(self, oeb, context):
         oeb.logger.info('Converting XHTML to Mobipocket markup...')
         self.oeb = oeb
diff --git a/src/calibre/ebooks/mobi/output.py b/src/calibre/ebooks/mobi/output.py
new file mode 100644
index 0000000000..1866888ab1
--- /dev/null
+++ b/src/calibre/ebooks/mobi/output.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.customize.conversion import OutputFormatPlugin
+from calibre.customize.conversion import OptionRecommendation
+
+class MOBIOutput(OutputFormatPlugin):
+
+    name = 'MOBI Output'
+    author = 'Marshall T. Vandegrift'
+    file_type = 'mobi'
+
+    options = set([
+        OptionRecommendation(name='rescale_images', recommended_value=False,
+            help=_('Modify images to meet Palm device size limitations.')
+        ),
+        OptionRecommendation(name='prefer_author_sort',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('When present, use author sort field as author.')
+        ),
+        OptionRecommendation(name='toc_title', recommended_value=None,
+            help=_('Title for any generated in-line table of contents.')
+        ),
+    ])
+
+    def convert(self, oeb, output_path, input_plugin, opts, log):
+        self.log, self.opts, self.oeb = log, opts, oeb
+        from calibre.ebooks.mobi.writer import PALM_MAX_IMAGE_SIZE, MobiWriter
+        from calibre.ebooks.mobi.mobiml import MobiMLizer
+        from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
+        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
+        from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
+        imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
+        tocadder = HTMLTOCAdder(title=opts.toc_title)
+        tocadder(oeb, opts)
+        mangler = CaseMangler()
+        mangler(oeb, opts)
+        rasterizer = SVGRasterizer()
+        rasterizer(oeb, opts)
+        mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
+        mobimlizer(oeb, opts)
+        writer = MobiWriter(imagemax=imagemax,
+                            prefer_author_sort=opts.prefer_author_sort)
+        writer(oeb, output_path)
+
diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py
index c521ba9977..e16deeccda 100644
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@@ -6,8 +6,6 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
 
-import sys
-import os
 from struct import pack
 import time
 import random
@@ -16,24 +14,14 @@ import re
 from itertools import izip, count
 from collections import defaultdict
 from urlparse import urldefrag
-import logging
 from PIL import Image
 from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
     OEB_RASTER_IMAGES
 from calibre.ebooks.oeb.base import namespace, prefixname
 from calibre.ebooks.oeb.base import urlnormalize
-from calibre.ebooks.oeb.base import OEBBook
-from calibre.ebooks.oeb.profile import Context
-from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
-from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
-from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
-from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
-from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
 from calibre.ebooks.mobi.palmdoc import compress_doc
 from calibre.ebooks.mobi.langcodes import iana2mobi
-from calibre.ebooks.mobi.mobiml import MBP_NS, MobiMLizer
-from calibre.customize.ui import run_plugins_on_postprocess
-from calibre.utils.config import Config, StringConfig
+from calibre.ebooks.mobi.mobiml import MBP_NS
 
 # TODO:
 # - Allow override CSS (?)
@@ -293,58 +281,22 @@ class Serializer(object):
                 buffer.write('%010d' % ioff)
 
 
-class MobiFlattener(object):
-    def config(self, cfg):
-        return cfg
-
-    def generate(self, opts):
-        return self
-
-    def __call__(self, oeb, context):
-        fbase = context.dest.fbase
-        fkey = context.dest.fnums.values()
-        flattener = CSSFlattener(
-            fbase=fbase, fkey=fkey, unfloat=True, untable=True)
-        return flattener(oeb, context)
-
 
 class MobiWriter(object):
     COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
 
-    DEFAULT_PROFILE = 'CybookG3'
-
-    TRANSFORMS = [HTMLTOCAdder, CaseMangler, MobiFlattener(), SVGRasterizer,
-                  ManifestTrimmer, MobiMLizer]
-
-    def __init__(self, compression=None, imagemax=None,
+    def __init__(self, compression=PALMDOC, imagemax=None,
                  prefer_author_sort=False):
         self._compression = compression or UNCOMPRESSED
         self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
         self._prefer_author_sort = prefer_author_sort
 
-    @classmethod
-    def config(cls, cfg):
-        """Add any book-writing options to the :class:`Config` object
-        :param:`cfg`.
-        """
-        mobi = cfg.add_group('mobipocket', _('Mobipocket-specific options.'))
-        mobi('compress', ['--compress'], default=False,
-             help=_('Compress file text using PalmDOC compression. '
-                    'Results in smaller files, but takes a long time to run.'))
-        mobi('rescale_images', ['--rescale-images'], default=False,
-             help=_('Modify images to meet Palm device size limitations.'))
-        mobi('prefer_author_sort', ['--prefer-author-sort'], default=False,
-             help=_('When present, use the author sorting information for '
-                    'generating the Mobipocket author metadata.'))
-        return cfg
-
     @classmethod
     def generate(cls, opts):
         """Generate a Writer instance from command-line options."""
-        compression = PALMDOC if opts.compress else UNCOMPRESSED
         imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
         prefer_author_sort = opts.prefer_author_sort
-        return cls(compression=compression, imagemax=imagemax,
+        return cls(compression=PALMDOC, imagemax=imagemax,
                    prefer_author_sort=prefer_author_sort)
 
     def __call__(self, oeb, path):
@@ -577,88 +529,4 @@ class MobiWriter(object):
             self._write(record)
 
 
-def config(defaults=None):
-    desc = _('Options to control the conversion to MOBI')
-    _profiles = list(sorted(Context.PROFILES.keys()))
-    if defaults is None:
-        c = Config('mobi', desc)
-    else:
-        c = StringConfig(defaults, desc)
 
-    profiles = c.add_group('profiles', _('Device renderer profiles. '
-        'Affects conversion of font sizes, image rescaling and rasterization '
-        'of tables. Valid profiles are: %s.') % ', '.join(_profiles))
-    profiles('source_profile', ['--source-profile'],
-             default='Browser', choices=_profiles,
-             help=_("Source renderer profile. Default is %default."))
-    profiles('dest_profile', ['--dest-profile'],
-             default='CybookG3', choices=_profiles,
-             help=_("Destination renderer profile. Default is %default."))
-    c.add_opt('encoding', ['--encoding'], default=None,
-              help=_('Character encoding for HTML files. Default is to auto detect.'))
-    return c
-
-
-def option_parser():
-    c = config()
-    parser = c.option_parser(usage='%prog '+_('[options]')+' file.opf')
-    parser.add_option(
-        '-o', '--output', default=None,
-        help=_('Output file. Default is derived from input filename.'))
-    parser.add_option(
-        '-v', '--verbose', default=0, action='count',
-        help=_('Useful for debugging.'))
-    return parser
-
-def oeb2mobi(opts, inpath):
-    logger = Logger(logging.getLogger('oeb2mobi'))
-    logger.setup_cli_handler(opts.verbose)
-    outpath = opts.output
-    if outpath is None:
-        outpath = os.path.basename(inpath)
-        outpath = os.path.splitext(outpath)[0] + '.mobi'
-    source = opts.source_profile
-    if source not in Context.PROFILES:
-        logger.error(_('Unknown source profile %r') % source)
-        return 1
-    dest = opts.dest_profile
-    if dest not in Context.PROFILES:
-        logger.error(_('Unknown destination profile %r') % dest)
-        return 1
-    compression = PALMDOC if opts.compress else UNCOMPRESSED
-    imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
-    context = Context(source, dest)
-    oeb = OEBBook(inpath, logger=logger, encoding=opts.encoding)
-    tocadder = HTMLTOCAdder(title=opts.toc_title)
-    tocadder.transform(oeb, context)
-    mangler = CaseMangler()
-    mangler.transform(oeb, context)
-    fbase = context.dest.fbase
-    fkey = context.dest.fnums.values()
-    flattener = CSSFlattener(
-        fbase=fbase, fkey=fkey, unfloat=True, untable=True)
-    flattener.transform(oeb, context)
-    rasterizer = SVGRasterizer()
-    rasterizer.transform(oeb, context)
-    trimmer = ManifestTrimmer()
-    trimmer.transform(oeb, context)
-    mobimlizer = MobiMLizer(ignore_tables=opts.ignore_tables)
-    mobimlizer.transform(oeb, context)
-    writer = MobiWriter(compression=compression, imagemax=imagemax,
-                        prefer_author_sort=opts.prefer_author_sort)
-    writer.dump(oeb, outpath)
-    run_plugins_on_postprocess(outpath, 'mobi')
-    logger.info(_('Output written to ') + outpath)
-
-def main(argv=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(argv[1:])
-    if len(args) != 1:
-        parser.print_help()
-        return 1
-    inpath = args[0]
-    retval = oeb2mobi(opts, inpath)
-    return retval
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index faf2d02dc4..bbac34f0b1 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -22,8 +22,7 @@ from cssutils import CSSParser
 from calibre.translations.dynamic import translate
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
-from calibre.ebooks.conversion.preprocess import HTMLPreProcessor, \
-        CSSPreProcessor
+from calibre.ebooks.conversion.preprocess import CSSPreProcessor
 
 XML_NS       = 'http://www.w3.org/XML/1998/namespace'
 XHTML_NS     = 'http://www.w3.org/1999/xhtml'
@@ -1506,7 +1505,7 @@ class OEBBook(object):
     COVER_OBJECT_XP = XPath('h:body//h:object[@data][position() = 1]')
 
     def __init__(self, logger,
-            html_preprocessor=HTMLPreProcessor(),
+            html_preprocessor,
             css_preprocessor=CSSPreProcessor(),
             encoding='utf-8', pretty_print=False):
         """Create empty book.  Arguments: