diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 6ce1b42356..1f459229c8 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -77,7 +77,7 @@ class Plumber(object):
 
     def __init__(self, input, output, log, report_progress=DummyReporter(),
             dummy=False, merge_plugin_recs=True, abort_after_input_dump=False,
-            override_input_metadata=False):
+            override_input_metadata=False, for_regex_wizard=False):
         '''
         :param input: Path to input file.
         :param output: Path to output file/directory
@@ -87,6 +87,7 @@ class Plumber(object):
         if isbytestring(output):
             output = output.decode(filesystem_encoding)
         self.original_input_arg = input
+        self.for_regex_wizard = for_regex_wizard
         self.input = os.path.abspath(input)
         self.output = os.path.abspath(output)
         self.log = log
@@ -123,7 +124,7 @@ OptionRecommendation(name='input_profile',
                    'conversion system information on how to interpret '
                    'various information in the input document. For '
                    'example resolution dependent lengths (i.e. lengths in '
-                   'pixels). Choices are:')+\
+                   'pixels). Choices are:')+
                         ', '.join([x.short_name for x in input_profiles()])
         ),
 
@@ -135,7 +136,7 @@ OptionRecommendation(name='output_profile',
                    'created document for the specified device. In some cases, '
                    'an output profile is required to produce documents that '
                    'will work on a device. For example EPUB on the SONY reader. '
-                   'Choices are:') + \
+                   'Choices are:') +
                            ', '.join([x.short_name for x in output_profiles()])
         ),
 
@@ -490,7 +491,7 @@ OptionRecommendation(name='asciiize',
             'cases where there are multiple representations of a character '
             '(characters shared by Chinese and Japanese for instance) the '
             'representation based on the current calibre interface language will be '
-            'used.')%\
+            'used.')%
             u'\u041c\u0438\u0445\u0430\u0438\u043b '
             u'\u0413\u043e\u0440\u0431\u0430\u0447\u0451\u0432'
 )
@@ -711,7 +712,6 @@ OptionRecommendation(name='search_replace',
         self.input_fmt = input_fmt
         self.output_fmt = output_fmt
 
-
         self.all_format_options = set()
         self.input_options = set()
         self.output_options = set()
@@ -775,7 +775,7 @@ OptionRecommendation(name='search_replace',
         if not html_files:
             raise ValueError(_('Could not find an ebook inside the archive'))
         html_files = [(f, os.stat(f).st_size) for f in html_files]
-        html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
+        html_files.sort(cmp=lambda x, y: cmp(x[1], y[1]))
         html_files = [f[0] for f in html_files]
         for q in ('toc', 'index'):
             for f in html_files:
@@ -783,8 +783,6 @@ OptionRecommendation(name='search_replace',
                     return f, os.path.splitext(f)[1].lower()[1:]
         return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
 
-
-
     def get_option_by_name(self, name):
         for group in (self.input_options, self.pipeline_options,
                       self.output_options, self.all_format_options):
@@ -956,7 +954,6 @@ OptionRecommendation(name='search_replace',
 
         self.log.info('Input debug saved to:', out_dir)
 
-
     def run(self):
         '''
         Run the conversion pipeline
@@ -965,10 +962,12 @@ OptionRecommendation(name='search_replace',
         self.setup_options()
         if self.opts.verbose:
             self.log.filter_level = self.log.DEBUG
+        if self.for_regex_wizard and hasattr(self.opts, 'no_process'):
+            self.opts.no_process = True
         self.flush()
         import cssutils, logging
         cssutils.log.setLevel(logging.WARN)
-        get_types_map() # Ensure the mimetypes module is intialized
+        get_types_map()  # Ensure the mimetypes module is intialized
 
         if self.opts.debug_pipeline is not None:
             self.opts.verbose = max(self.opts.verbose, 4)
@@ -1003,6 +1002,8 @@ OptionRecommendation(name='search_replace',
         self.ui_reporter(0.01, _('Converting input to HTML...'))
         ir = CompositeProgressReporter(0.01, 0.34, self.ui_reporter)
         self.input_plugin.report_progress = ir
+        if self.for_regex_wizard:
+            self.input_plugin.for_viewer = True
         with self.input_plugin:
             self.oeb = self.input_plugin(stream, self.opts,
                                         self.input_fmt, self.log,
@@ -1014,8 +1015,12 @@ OptionRecommendation(name='search_replace',
             if self.input_fmt in ('recipe', 'downloaded_recipe'):
                 self.opts_to_mi(self.user_metadata)
             if not hasattr(self.oeb, 'manifest'):
-                self.oeb = create_oebbook(self.log, self.oeb, self.opts,
-                        encoding=self.input_plugin.output_encoding)
+                self.oeb = create_oebbook(
+                    self.log, self.oeb, self.opts,
+                    encoding=self.input_plugin.output_encoding,
+                    for_regex_wizard=self.for_regex_wizard)
+            if self.for_regex_wizard:
+                return
             self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
             self.opts.is_image_collection = self.input_plugin.is_image_collection
             pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
@@ -1081,7 +1086,6 @@ OptionRecommendation(name='search_replace',
             self.dump_oeb(self.oeb, out_dir)
             self.log('Structured HTML written to:', out_dir)
 
-
         if self.opts.extra_css and os.path.exists(self.opts.extra_css):
             self.opts.extra_css = open(self.opts.extra_css, 'rb').read()
 
@@ -1161,13 +1165,20 @@ OptionRecommendation(name='search_replace',
         self.log(self.output_fmt.upper(), 'output written to', self.output)
         self.flush()
 
+# This has to be global as create_oebbook can be called from other locations
+# (for example in the html input plugin)
+regex_wizard_callback = None
+def set_regex_wizard_callback(f):
+    global regex_wizard_callback
+    regex_wizard_callback = f
+
 def create_oebbook(log, path_or_stream, opts, reader=None,
-        encoding='utf-8', populate=True):
+        encoding='utf-8', populate=True, for_regex_wizard=False):
     '''
     Create an OEBBook.
     '''
     from calibre.ebooks.oeb.base import OEBBook
-    html_preprocessor = HTMLPreProcessor(log, opts)
+    html_preprocessor = HTMLPreProcessor(log, opts, regex_wizard_callback=regex_wizard_callback)
     if not encoding:
         encoding = None
     oeb = OEBBook(log, html_preprocessor,
@@ -1182,3 +1193,4 @@ def create_oebbook(log, path_or_stream, opts, reader=None,
 
     reader()(oeb, path_or_stream)
     return oeb
+
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 91f91c8b3d..126709200a 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -497,9 +497,11 @@ class HTMLPreProcessor(object):
                      (re.compile('<span[^><]*?id=subtitle[^><]*?>(.*?)</span>', re.IGNORECASE|re.DOTALL),
                       lambda match : '<h3 class="subtitle">%s</h3>'%(match.group(1),)),
                      ]
-    def __init__(self, log=None, extra_opts=None):
+    def __init__(self, log=None, extra_opts=None, regex_wizard_callback=None):
         self.log = log
         self.extra_opts = extra_opts
+        self.regex_wizard_callback = regex_wizard_callback
+        self.current_href = None
 
     def is_baen(self, src):
         return re.compile(r'<meta\s+name="Publisher"\s+content=".*?Baen.*?"',
@@ -586,6 +588,9 @@ class HTMLPreProcessor(object):
         for rule in self.PREPROCESS + start_rules:
             html = rule[0].sub(rule[1], html)
 
+        if self.regex_wizard_callback is not None:
+            self.regex_wizard_callback(self.current_href, html)
+
         if get_preprocess_html:
             return html
 
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index eb5b0042e7..671caf49fc 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -871,6 +871,7 @@ class Manifest(object):
             orig_data = data
             fname = urlunquote(self.href)
             self.oeb.log.debug('Parsing', fname, '...')
+            self.oeb.html_preprocessor.current_href = self.href
             try:
                 data = parse_html(data, log=self.oeb.log,
                         decoder=self.oeb.decode,
@@ -1312,9 +1313,9 @@ class Guide(object):
                          ('notes', __('Notes')),
                          ('preface', __('Preface')),
                          ('text', __('Main Text'))]
-        TYPES = set(t for t, _ in _TYPES_TITLES)
+        TYPES = set(t for t, _ in _TYPES_TITLES)  # noqa
         TITLES = dict(_TYPES_TITLES)
-        ORDER = dict((t, i) for i, (t, _) in enumerate(_TYPES_TITLES))
+        ORDER = dict((t, i) for i, (t, _) in enumerate(_TYPES_TITLES))  # noqa
 
         def __init__(self, oeb, type, title, href):
             self.oeb = oeb
diff --git a/src/calibre/ebooks/oeb/iterator/__init__.py b/src/calibre/ebooks/oeb/iterator/__init__.py
index 29487cbb84..3e2dfc5df2 100644
--- a/src/calibre/ebooks/oeb/iterator/__init__.py
+++ b/src/calibre/ebooks/oeb/iterator/__init__.py
@@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import os, re
+import sys, os, re
 
 from calibre.customize.ui import available_input_formats
 
@@ -26,17 +26,18 @@ def EbookIterator(*args, **kwargs):
     from calibre.ebooks.oeb.iterator.book import EbookIterator
     return EbookIterator(*args, **kwargs)
 
-def get_preprocess_html(path_to_ebook, output):
-    from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
-    iterator = EbookIterator(path_to_ebook)
-    iterator.__enter__(only_input_plugin=True, run_char_count=False,
-            read_anchor_map=False)
-    preprocessor = HTMLPreProcessor(None, False)
-    with open(output, 'wb') as out:
-        for path in iterator.spine:
-            with open(path, 'rb') as f:
-                html = f.read().decode('utf-8', 'replace')
-            html = preprocessor(html, get_preprocess_html=True)
+def get_preprocess_html(path_to_ebook, output=None):
+    from calibre.ebooks.conversion.plumber import set_regex_wizard_callback, Plumber
+    from calibre.utils.logging import DevNull
+    from calibre.ptempfile import TemporaryDirectory
+    raw = {}
+    set_regex_wizard_callback(raw.__setitem__)
+    with TemporaryDirectory('_regex_wiz') as tdir:
+        pl = Plumber(path_to_ebook, os.path.join(tdir, 'a.epub'), DevNull(), for_regex_wizard=True)
+        pl.run()
+        items = [raw[item.href] for item in pl.oeb.spine if item.href in raw]
+
+    with (sys.stdout if output is None else open(output, 'wb')) as out:
+        for html in items:
             out.write(html.encode('utf-8'))
             out.write(b'\n\n' + b'-'*80 + b'\n\n')
-