diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py
index 6530e5f16c..c531a15e34 100644
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@@ -122,8 +122,9 @@ class InputFormatPlugin(Plugin):
     def convert(self, stream, options, file_ext, log, accelerators):
         '''
         This method must be implemented in sub-classes. It must return
-        the path to the created OPF file. All output should be contained in
-        the current directory. If this plugin creates files outside the current
+        the path to the created OPF file or an :class:`OEBBook` instance.
+        All output should be contained in the current directory.
+        If this plugin creates files outside the current
         directory they must be deleted/marked for deletion before this method
         returns.
 
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 6142cb555a..41d5f0abd9 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -299,21 +299,15 @@ OptionRecommendation(name='language',
 
         # Create an OEBBook from the input file. The input plugin does all the
         # heavy lifting.
-        from calibre.ebooks.oeb.reader import OEBReader
-        from calibre.ebooks.oeb.base import OEBBook
         accelerators = {}
 
         tdir = PersistentTemporaryDirectory('_plumber')
 
-        opfpath = self.input_plugin(open(self.input, 'rb'), self.opts,
+        self.oeb = self.input_plugin(open(self.input, 'rb'), self.opts,
                                     self.input_fmt, self.log,
                                     accelerators, tdir)
-        html_preprocessor = HTMLPreProcessor()
-        self.reader = OEBReader()
-        self.oeb = OEBBook(self.log, html_preprocessor=html_preprocessor)
-        # Read OEB Book into OEBBook
-        self.log.info('Parsing all content...')
-        self.reader(self.oeb, opfpath)
+        if not hasattr(self.oeb, 'manifest'):
+            self.oeb = create_oebbook(self.log, self.oeb)
 
         self.opts.source = self.opts.input_profile
         self.opts.dest = self.opts.output_profile
@@ -340,7 +334,20 @@ OptionRecommendation(name='language',
         trimmer(self.oeb, self.opts)
 
         self.log.info('Creating %s...'%self.output_plugin.name)
-        self.output_plugin.convert(self.oeb, self.output, self.input_plugin, self.opts,
-                self.log)
+        self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
+                self.opts, self.log)
 
+def create_oebbook(log, opfpath):
+    '''
+    Create an OEBBook from an OPF file.
+    '''
+    from calibre.ebooks.oeb.reader import OEBReader
+    from calibre.ebooks.oeb.base import OEBBook
+    html_preprocessor = HTMLPreProcessor()
+    reader = OEBReader()
+    oeb = OEBBook(log, html_preprocessor=html_preprocessor)
+    # Read OEB Book into OEBBook
+    log.info('Parsing all content...')
+    reader(oeb, opfpath)
+    return oeb
 
diff --git a/src/calibre/ebooks/epub/__init__.py b/src/calibre/ebooks/epub/__init__.py
index 0be88da070..2bc076a8ad 100644
--- a/src/calibre/ebooks/epub/__init__.py
+++ b/src/calibre/ebooks/epub/__init__.py
@@ -10,23 +10,23 @@ import sys, textwrap, re, os, uuid
 from itertools import cycle
 from calibre.utils.config import Config, StringConfig
 from calibre.utils.zipfile import ZipFile, ZIP_STORED
-from calibre.ebooks.html import config as common_config, tostring
+from calibre.ebooks.html import tostring
 from lxml import etree
 
 class DefaultProfile(object):
-    
+
     flow_size            = sys.maxint
     screen_size          = None
     remove_special_chars = False
     remove_object_tags   = False
-    
+
 class PRS505(DefaultProfile):
-    
+
     flow_size            = 270000
     screen_size          = (590, 765)
     remove_special_chars = re.compile(u'[\u200b\u00ad]')
     remove_object_tags   = True
-        
+
 
 PROFILES = {
             'PRS505' : PRS505,
@@ -64,11 +64,11 @@ def config(defaults=None, name='epub'):
         c = Config(name, desc)
     else:
         c = StringConfig(defaults, desc)
-    
+
     c.update(common_config())
     c.remove_opt('output')
     c.remove_opt('zip')
-    
+
     c.add_opt('output', ['-o', '--output'], default=None,
              help=_('The output EPUB file. If not specified, it is '
                     'derived from the input file name.'))
@@ -81,22 +81,22 @@ def config(defaults=None, name='epub'):
               help=_('Either the path to a CSS stylesheet or raw CSS. '
                      'This CSS will override any existing CSS '
                      'declarations in the source files.'))
-    structure = c.add_group('structure detection', 
+    structure = c.add_group('structure detection',
                             _('Control auto-detection of document structure.'))
-    structure('chapter', ['--chapter'], 
+    structure('chapter', ['--chapter'],
               default="//*[re:match(name(), 'h[1-2]') and "
               "re:test(., 'chapter|book|section|part', 'i')] | "
               "//*[@class = 'chapter']",
             help=_('''\
 An XPath expression to detect chapter titles. The default is to consider <h1> or
-<h2> tags that contain the words "chapter","book","section" or "part" as chapter titles as 
-well as any tags that have class="chapter". 
+<h2> tags that contain the words "chapter","book","section" or "part" as chapter titles as
+well as any tags that have class="chapter".
 The expression used must evaluate to a list of elements. To disable chapter detection,
 use the expression "/". See the XPath Tutorial in the calibre User Manual for further
 help on using this feature.
 ''').replace('\n', ' '))
     structure('chapter_mark', ['--chapter-mark'], choices=['pagebreak', 'rule', 'both', 'none'],
-              default='pagebreak', 
+              default='pagebreak',
               help=_('Specify how to mark detected chapters. A value of '
                      '"pagebreak" will insert page breaks before chapters. '
                      'A value of "rule" will insert a line before chapters. '
@@ -129,13 +129,13 @@ help on using this feature.
               help=_('XPath expression to find the name of each page in the '
                      'pagination map relative to its boundary element. '
                      'Default is to number all pages staring with 1.'))
-    toc = c.add_group('toc', 
+    toc = c.add_group('toc',
         _('''\
 Control the automatic generation of a Table of Contents. If an OPF file is detected
 and it specifies a Table of Contents, then that will be used rather than trying
 to auto-generate a Table of Contents.
 ''').replace('\n', ' '))
-    toc('max_toc_links', ['--max-toc-links'], default=50, 
+    toc('max_toc_links', ['--max-toc-links'], default=50,
         help=_('Maximum number of links to insert into the TOC. Set to 0 '
                'to disable. Default is: %default. Links are only added to the '
                'TOC if less than the --toc-threshold number of chapters were detected.'))
@@ -166,15 +166,15 @@ to auto-generate a Table of Contents.
         help=_('Normally, if the source file already has a Table of Contents, '
                'it is used in preference to the auto-generated one. '
                'With this option, the auto-generated one is always used.'))
-    
+
     layout = c.add_group('page layout', _('Control page layout'))
-    layout('margin_top', ['--margin-top'], default=5.0, 
+    layout('margin_top', ['--margin-top'], default=5.0,
            help=_('Set the top margin in pts. Default is %default'))
-    layout('margin_bottom', ['--margin-bottom'], default=5.0, 
+    layout('margin_bottom', ['--margin-bottom'], default=5.0,
            help=_('Set the bottom margin in pts. Default is %default'))
-    layout('margin_left', ['--margin-left'], default=5.0, 
+    layout('margin_left', ['--margin-left'], default=5.0,
            help=_('Set the left margin in pts. Default is %default'))
-    layout('margin_right', ['--margin-right'], default=5.0, 
+    layout('margin_right', ['--margin-right'], default=5.0,
            help=_('Set the right margin in pts. Default is %default'))
     layout('base_font_size2', ['--base-font-size'], default=12.0,
            help=_('The base font size in pts. Default is %defaultpt. '
@@ -195,12 +195,12 @@ to auto-generate a Table of Contents.
                   'This is only neccessary if the HTML files contain CSS that '
                   'uses sibling selectors. Enabling this greatly slows down '
                   'processing of large HTML files.'))
-    
+
     c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
               help=_('Print generated OPF file to stdout'))
     c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
               help=_('Print generated NCX file to stdout'))
-    c.add_opt('keep_intermediate', ['--keep-intermediate-files'], group='debug', 
+    c.add_opt('keep_intermediate', ['--keep-intermediate-files'], group='debug',
               default=False,
               help=_('Keep intermediate files during processing by html2epub'))
     c.add_opt('extract_to', ['--extract-to'], group='debug', default=None,
diff --git a/src/calibre/ebooks/epub/fonts.py b/src/calibre/ebooks/epub/fonts.py
index 5d0887f2d0..67e6066ed1 100644
--- a/src/calibre/ebooks/epub/fonts.py
+++ b/src/calibre/ebooks/epub/fonts.py
@@ -14,7 +14,7 @@ from lxml.cssselect import CSSSelector
 from lxml import etree
 from lxml.html import HtmlElement
 
-from calibre.ebooks.html import fromstring
+from calibre.ebooks.html_old import fromstring
 from calibre.ebooks.epub import rules
 from cssutils import CSSParser
 
@@ -24,7 +24,7 @@ absolute_size = r'(?P<abs>(x?x-)?(small|large)|medium)'
 relative_size = r'(?P<rel>smaller|larger)'
 
 font_size_pat   = re.compile('|'.join((relative_size, absolute_size, length)), re.I)
-line_height_pat = re.compile(r'({num})(px|in|cm|mm|pt|pc)'.replace('{num}', num))  
+line_height_pat = re.compile(r'({num})(px|in|cm|mm|pt|pc)'.replace('{num}', num))
 
 PTU = {
        'in' : 72.,
@@ -37,12 +37,12 @@ PTU = {
 DEFAULT_FONT_SIZE = 12
 
 class Rationalizer(object):
-    
+
     @classmethod
     def specificity(cls, s):
         '''Map CSS specificity tuple to a single integer'''
-        return sum([10**(4-i) + x for i,x in enumerate(s)]) 
-        
+        return sum([10**(4-i) + x for i,x in enumerate(s)])
+
     @classmethod
     def compute_font_size(cls, elem):
         '''
@@ -59,7 +59,7 @@ class Rationalizer(object):
             elem.computed_font_size = sfs(parent.computed_font_size)
         else:
             elem.computed_font_size = sfs
-        
+
     @classmethod
     def calculate_font_size(cls, style):
         'Return font size in pts from style object. For relative units returns a callable'
@@ -69,7 +69,7 @@ class Rationalizer(object):
             fs = match.group()
         if style.fontSize:
             fs = style.fontSize
-            
+
         match = font_size_pat.search(fs)
         if match is None:
             return None
@@ -89,8 +89,8 @@ class Rationalizer(object):
             return 12 * x
         if match.get('zero', False):
             return 0.
-        return functools.partial(operator.mul, 1.2) if 'larger' in fs.lower() else functools.partial(operator.mul, 0.8) 
-        
+        return functools.partial(operator.mul, 1.2) if 'larger' in fs.lower() else functools.partial(operator.mul, 0.8)
+
     @classmethod
     def resolve_rules(cls, stylesheets):
         for sheet in stylesheets:
@@ -104,12 +104,12 @@ class Rationalizer(object):
                     if font_size is not None:
                         for s in r.selectorList:
                             sheet.fs_rules.append([CSSSelector(s.selectorText), font_size])
-                    orig = line_height_pat.search(r.style.lineHeight) 
+                    orig = line_height_pat.search(r.style.lineHeight)
                     if orig is not None:
                         for s in r.selectorList:
                             sheet.lh_rules.append([CSSSelector(s.selectorText), float(orig.group(1)) * PTU[orig.group(2).lower()]])
-    
-        
+
+
     @classmethod
     def apply_font_size_rules(cls, stylesheets, root):
         'Add a ``specified_font_size`` attribute to every element that has a specified font size'
@@ -119,7 +119,7 @@ class Rationalizer(object):
                 elems = selector(root)
                 for elem in elems:
                     elem.specified_font_size = font_size
-    
+
     @classmethod
     def remove_font_size_information(cls, stylesheets):
         for r in rules(stylesheets):
@@ -134,17 +134,17 @@ class Rationalizer(object):
                 r.style.removeProperty('font')
             if line_height_pat.search(r.style.lineHeight) is not None:
                 r.style.removeProperty('line-height')
-    
+
     @classmethod
     def compute_font_sizes(cls, root, stylesheets, base=12):
         stylesheets = [s for s in stylesheets if hasattr(s, 'cssText')]
         cls.apply_font_size_rules(stylesheets, root)
-        
+
         # Compute the effective font size of all tags
         root.computed_font_size = DEFAULT_FONT_SIZE
         for elem in root.iter(etree.Element):
             cls.compute_font_size(elem)
-        
+
         extra_css = {}
         if base > 0:
             # Calculate the "base" (i.e. most common) font size
@@ -157,20 +157,20 @@ class Rationalizer(object):
                     if t: t = t.strip()
                     if t:
                         font_sizes[elem.computed_font_size] += len(t)
-                    
+
                 t = getattr(elem, 'tail', '')
                 if t: t = t.strip()
                 if t:
                     parent = elem.getparent()
                     if parent.tag not in IGNORE:
                         font_sizes[parent.computed_font_size] += len(t)
-                
+
             try:
                 most_common = max(font_sizes.items(), key=operator.itemgetter(1))[0]
                 scale = base/most_common if most_common > 0 else 1.
             except ValueError:
                 scale = 1.
-            
+
             # rescale absolute line-heights
             counter = 0
             for sheet in stylesheets:
@@ -181,17 +181,17 @@ class Rationalizer(object):
                         if not extra_css.has_key(elem.get('id')):
                             extra_css[elem.get('id')] = []
                         extra_css[elem.get('id')].append('line-height:%fpt'%(lh*scale))
-            
-        
-            
+
+
+
             # Rescale all computed font sizes
             for elem in body.iter(etree.Element):
                 if isinstance(elem, HtmlElement):
                     elem.computed_font_size *= scale
-        
-        # Remove all font size specifications from the last stylesheet 
+
+        # Remove all font size specifications from the last stylesheet
         cls.remove_font_size_information(stylesheets[-1:])
-                    
+
         # Create the CSS to implement the rescaled font sizes
         for elem in body.iter(etree.Element):
             cfs, pcfs = map(operator.attrgetter('computed_font_size'), (elem, elem.getparent()))
@@ -201,12 +201,12 @@ class Rationalizer(object):
                 if not extra_css.has_key(elem.get('id')):
                     extra_css[elem.get('id')] = []
                 extra_css[elem.get('id')].append('font-size: %f%%'%(100*(cfs/pcfs)))
-                
+
         css = CSSParser(loglevel=logging.ERROR).parseString('')
         for id, r in extra_css.items():
             css.add('#%s {%s}'%(id, ';'.join(r)))
         return css
-    
+
     @classmethod
     def rationalize(cls, stylesheets, root, opts):
         logger     = logging.getLogger('html2epub')
@@ -229,7 +229,7 @@ class Rationalizer(object):
 ################################################################################
 
 class FontTest(unittest.TestCase):
-    
+
     def setUp(self):
         from calibre.ebooks.epub import config
         self.opts = config(defaults='').parse()
@@ -246,10 +246,10 @@ class FontTest(unittest.TestCase):
                 <p id="p2">Some other <span class="it">text</span>.</p>
                 <p id="longest">The longest piece of single font size text in this entire file. Used to test resizing.</p>
             </body>
-        </html> 
+        </html>
         '''
         self.root = fromstring(self.html)
-        
+
     def do_test(self, css, base=DEFAULT_FONT_SIZE, scale=1):
         root1 = copy.deepcopy(self.root)
         root1.computed_font_size = DEFAULT_FONT_SIZE
@@ -262,39 +262,39 @@ class FontTest(unittest.TestCase):
         for elem in root2.iter(etree.Element):
             Rationalizer.compute_font_size(elem)
         for e1, e2 in zip(root1.xpath('//body')[0].iter(etree.Element), root2.xpath('//body')[0].iter(etree.Element)):
-            self.assertAlmostEqual(e1.computed_font_size, e2.computed_font_size, 
+            self.assertAlmostEqual(e1.computed_font_size, e2.computed_font_size,
                 msg='Computed font sizes for %s not equal. Original: %f Processed: %f'%\
                 (root1.getroottree().getpath(e1), e1.computed_font_size, e2.computed_font_size))
         return stylesheet2.cssText
-        
+
     def testStripping(self):
         'Test that any original entries are removed from the CSS'
         css = 'p { font: bold 10px italic smaller; font-size: x-large} \na { font-size: 0 }'
         css = CSSParser(loglevel=logging.ERROR).parseString(css)
         Rationalizer.compute_font_sizes(copy.deepcopy(self.root), [css])
-        self.assertEqual(css.cssText.replace(' ', '').replace('\n', ''), 
+        self.assertEqual(css.cssText.replace(' ', '').replace('\n', ''),
                          'p{font:bolditalic}')
-    
+
     def testIdentity(self):
         'Test that no unnecessary font size changes are made'
         extra_css = self.do_test('div {font-size:12pt} \nspan {font-size:100%}')
         self.assertEqual(extra_css.strip(), '')
-        
+
     def testRelativization(self):
         'Test conversion of absolute to relative sizes'
         self.do_test('#p1 {font: 24pt} b {font: 12pt} .it {font: 48pt} #p2 {font: 100%}')
-        
+
     def testResizing(self):
         'Test resizing of fonts'
         self.do_test('#longest {font: 24pt} .it {font:20pt; line-height:22pt}')
-        
+
 
 def suite():
     return unittest.TestLoader().loadTestsFromTestCase(FontTest)
-    
+
 def test():
     unittest.TextTestRunner(verbosity=2).run(suite())
 
 if __name__ == '__main__':
-    sys.exit(test())    
-        
\ No newline at end of file
+    sys.exit(test())
+
diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py
index 318cf5cc02..0ce4629062 100644
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@@ -38,7 +38,7 @@ from lxml.etree import XPath
 from lxml import html, etree
 from PyQt4.Qt import QApplication, QPixmap
 
-from calibre.ebooks.html import Processor, merge_metadata, get_filelist,\
+from calibre.ebooks.html_old import Processor, merge_metadata, get_filelist,\
     opf_traverse, create_metadata, rebase_toc, Link, parser
 from calibre.ebooks.epub import config as common_config, tostring
 from calibre.ptempfile import TemporaryDirectory
diff --git a/src/calibre/ebooks/epub/iterator.py b/src/calibre/ebooks/epub/iterator.py
index e55d402bef..5d47c93ea3 100644
--- a/src/calibre/ebooks/epub/iterator.py
+++ b/src/calibre/ebooks/epub/iterator.py
@@ -16,7 +16,7 @@ from calibre.ebooks.epub import config
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.chardet import xml_to_unicode
-from calibre.ebooks.html import create_dir
+from calibre.ebooks.html_old import create_dir
 from calibre.utils.zipfile import safe_replace, ZipFile
 from calibre.utils.config import DynamicConfig
 
diff --git a/src/calibre/ebooks/epub/split.py b/src/calibre/ebooks/epub/split.py
index c39fe6d181..8ff62a1c4b 100644
--- a/src/calibre/ebooks/epub/split.py
+++ b/src/calibre/ebooks/epub/split.py
@@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
 Split the flows in an epub file to conform to size limitations.
 '''
 
-import os, math, logging, functools, collections, re, copy, sys
+import os, math, functools, collections, re, copy, sys
 
 from lxml.etree import XPath as _XPath
 from lxml import etree, html
@@ -24,16 +24,16 @@ SPLIT_ATTR       = 'cs'
 SPLIT_POINT_ATTR = 'csp'
 
 class SplitError(ValueError):
-    
+
     def __init__(self, path, root):
         size = len(tostring(root))/1024.
-        ValueError.__init__(self, _('Could not find reasonable point at which to split: %s Sub-tree size: %d KB')% 
+        ValueError.__init__(self, _('Could not find reasonable point at which to split: %s Sub-tree size: %d KB')%
                             (os.path.basename(path), size))
 
-    
+
 
 class Splitter(object):
-    
+
     def __init__(self, path, opts, stylesheet_map, opf):
         self.setup_cli_handler(opts.verbose)
         self.path = path
@@ -44,10 +44,10 @@ class Splitter(object):
         self.orig_size = os.stat(content(path)).st_size
         self.log_info('\tSplitting %s (%d KB)', path, self.orig_size/1024.)
         root = html.fromstring(open(content(path)).read())
-            
+
         self.page_breaks, self.trees = [], []
         self.split_size = 0
-        
+
         # Split on page breaks
         self.splitting_on_page_breaks = True
         if not opts.dont_split_on_page_breaks:
@@ -59,7 +59,7 @@ class Splitter(object):
         else:
             self.trees = [root.getroottree()]
             trees = list(self.trees)
-        
+
         # Split any remaining over-sized trees
         self.splitting_on_page_breaks = False
         if self.opts.profile.flow_size < sys.maxint:
@@ -67,7 +67,7 @@ class Splitter(object):
             self.log_info('\tLooking for large trees...')
             for i, tree in enumerate(list(trees)):
                 self.trees = []
-                size = len(tostring(tree.getroot())) 
+                size = len(tostring(tree.getroot()))
                 if size > self.opts.profile.flow_size:
                     lt_found = True
                     try:
@@ -81,7 +81,7 @@ class Splitter(object):
                     trees[i:i+1] = list(self.trees)
             if not lt_found:
                 self.log_info('\tNo large trees found')
-        
+
         self.trees = trees
         self.was_split = len(self.trees) > 1
         if self.was_split:
@@ -91,17 +91,17 @@ class Splitter(object):
                 for f in self.files:
                     self.log_info('\t\t\t%s - %d KB', f, os.stat(content(f)).st_size/1024.)
             self.fix_opf(opf)
-            
+
         self.trees = None
-        
-    
+
+
     def split_text(self, text, root, size):
         self.log_debug('\t\t\tSplitting text of length: %d'%len(text))
         rest = text.replace('\r', '')
         parts = re.split('\n\n', rest)
         self.log_debug('\t\t\t\tFound %d parts'%len(parts))
         if max(map(len, parts)) > size:
-            raise SplitError('Cannot split as file contains a <pre> tag with a very large paragraph', root) 
+            raise SplitError('Cannot split as file contains a <pre> tag with a very large paragraph', root)
         ans = []
         buf = ''
         for part in parts:
@@ -111,8 +111,8 @@ class Splitter(object):
                 ans.append(buf)
                 buf = part
         return ans
-            
-    
+
+
     def split_to_size(self, tree):
         self.log_debug('\t\tSplitting...')
         root = tree.getroot()
@@ -134,7 +134,7 @@ class Splitter(object):
                 p = pre.getparent()
                 i = p.index(pre)
                 p[i:i+1] = new_pres
-        
+
         split_point, before = self.find_split_point(root)
         if split_point is None or self.split_size > 6*self.orig_size:
             if not self.always_remove:
@@ -142,7 +142,7 @@ class Splitter(object):
                                 'structure preservation. This may cause '
                                 'incorrect rendering.'))
             raise SplitError(self.path, root)
-        
+
         for t in self.do_split(tree, split_point, before):
             r = t.getroot()
             if self.is_page_empty(r):
@@ -151,12 +151,12 @@ class Splitter(object):
             if size <= self.opts.profile.flow_size:
                 self.trees.append(t)
                 #print tostring(t.getroot(), pretty_print=True)
-                self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)', 
+                self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)',
                                len(self.trees), size/1024.)
                 self.split_size += size
             else:
                 self.split_to_size(t)
-    
+
     def is_page_empty(self, root):
         body = root.find('body')
         if body is None:
@@ -170,14 +170,14 @@ class Splitter(object):
             if img.get('style', '') != 'display:none':
                 return False
         return True
-                
+
     def do_split(self, tree, split_point, before):
         '''
-        Split ``tree`` into a *before* and *after* tree at ``split_point``, 
-        preserving tag structure, but not duplicating any text. 
+        Split ``tree`` into a *before* and *after* tree at ``split_point``,
+        preserving tag structure, but not duplicating any text.
         All tags that have had their text and tail
         removed have the attribute ``calibre_split`` set to 1.
-        
+
         :param before: If True tree is split before split_point, otherwise after split_point
         :return: before_tree, after_tree
         '''
@@ -188,7 +188,7 @@ class Splitter(object):
         body, body2  = root.body, root2.body
         split_point  = root.xpath(path)[0]
         split_point2 = root2.xpath(path)[0]
-        
+
         def nix_element(elem, top=True):
             if self.always_remove:
                 parent = elem.getparent()
@@ -198,18 +198,18 @@ class Splitter(object):
                 else:
                     index = parent.index(elem)
                     parent[index:index+1] = list(elem.iterchildren())
-                
+
             else:
                 elem.text = u''
                 elem.tail = u''
                 elem.set(SPLIT_ATTR, '1')
                 if elem.tag.lower() in ['ul', 'ol', 'dl', 'table', 'hr', 'img']:
                     elem.set('style', 'display:none')
-        
+
         def fix_split_point(sp):
             if not self.splitting_on_page_breaks:
-                sp.set('style', sp.get('style', '')+'page-break-before:avoid;page-break-after:avoid') 
-        
+                sp.set('style', sp.get('style', '')+'page-break-before:avoid;page-break-after:avoid')
+
         # Tree 1
         hit_split_point = False
         for elem in list(body.iterdescendants(etree.Element)):
@@ -223,8 +223,8 @@ class Splitter(object):
                 continue
             if hit_split_point:
                 nix_element(elem)
-            
-            
+
+
         # Tree 2
         hit_split_point = False
         for elem in list(body2.iterdescendants(etree.Element)):
@@ -238,17 +238,17 @@ class Splitter(object):
                 continue
             if not hit_split_point:
                 nix_element(elem, top=False)
-        
+
         return tree, tree2
-                
-    
+
+
     def split_on_page_breaks(self, orig_tree):
         ordered_ids = []
         for elem in orig_tree.xpath('//*[@id]'):
             id = elem.get('id')
             if id in self.page_break_ids:
                 ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)])
-                
+
         self.trees = []
         tree = orig_tree
         for pattern, before in ordered_ids:
@@ -260,13 +260,13 @@ class Splitter(object):
                 tree = after
         self.trees.append(tree)
         self.trees = [t for t in self.trees if not self.is_page_empty(t.getroot())]
-                
-            
-                
+
+
+
     def find_page_breaks(self, stylesheets, root):
         '''
         Find all elements that have either page-break-before or page-break-after set.
-        Populates `self.page_breaks` with id based XPath selectors (for elements that don't 
+        Populates `self.page_breaks` with id based XPath selectors (for elements that don't
         have ids, an id is created).
         '''
         page_break_selectors = set([])
@@ -283,16 +283,16 @@ class Splitter(object):
                     page_break_selectors.add((CSSSelector(rule.selectorText), False))
             except:
                 pass
-            
+
         page_breaks = set([])
         for selector, before in page_break_selectors:
             for elem in selector(root):
                 elem.pb_before = before
                 page_breaks.add(elem)
-                
+
         for i, elem in enumerate(root.iter()):
             elem.pb_order = i
-            
+
         page_breaks = list(page_breaks)
         page_breaks.sort(cmp=lambda x,y : cmp(x.pb_order, y.pb_order))
         self.page_break_ids = []
@@ -300,12 +300,12 @@ class Splitter(object):
             x.set('id', x.get('id', 'calibre_pb_%d'%i))
             id = x.get('id')
             self.page_breaks.append((XPath('//*[@id="%s"]'%id), x.pb_before))
-            self.page_break_ids.append(id)                        
-        
-        
+            self.page_break_ids.append(id)
+
+
     def find_split_point(self, root):
         '''
-        Find the tag at which to split the tree rooted at `root`. 
+        Find the tag at which to split the tree rooted at `root`.
         Search order is:
             * Heading tags
             * <div> tags
@@ -314,7 +314,7 @@ class Splitter(object):
             * <p> tags
             * <br> tags
             * <li> tags
-            
+
         We try to split in the "middle" of the file (as defined by tag counts.
         '''
         def pick_elem(elems):
@@ -325,18 +325,18 @@ class Splitter(object):
                     i = int(math.floor(len(elems)/2.))
                     elems[i].set(SPLIT_POINT_ATTR, '1')
                     return elems[i]
-    
+
         for path in (
-                     '//*[re:match(name(), "h[1-6]", "i")]', 
+                     '//*[re:match(name(), "h[1-6]", "i")]',
                      '/html/body/div',
                      '//pre',
-                     '//hr', 
+                     '//hr',
                      '//p',
                      '//div',
                      '//br',
                      '//li',
                      ):
-            elems = root.xpath(path, 
+            elems = root.xpath(path,
                     namespaces={'re':'http://exslt.org/regular-expressions'})
             elem = pick_elem(elems)
             if elem is not None:
@@ -345,9 +345,9 @@ class Splitter(object):
                 except:
                     continue
                 return elem, True
-            
+
         return None, True
-    
+
     def commit(self):
         '''
         Commit all changes caused by the split. This removes the previously
@@ -357,7 +357,7 @@ class Splitter(object):
         '''
         self.anchor_map = collections.defaultdict(lambda :self.base%0)
         self.files = []
-        
+
         for i, tree in enumerate(self.trees):
             root = tree.getroot()
             self.files.append(self.base%i)
@@ -367,7 +367,7 @@ class Splitter(object):
             for elem in root.xpath('//*[@%s or @%s]'%(SPLIT_ATTR, SPLIT_POINT_ATTR)):
                 elem.attrib.pop(SPLIT_ATTR, None)
                 elem.attrib.pop(SPLIT_POINT_ATTR, '0')
-                
+
         for current, tree in zip(self.files, self.trees):
             for a in tree.getroot().xpath('//a[@href]'):
                 href = a.get('href').strip()
@@ -375,10 +375,10 @@ class Splitter(object):
                     anchor = href[1:]
                     file = self.anchor_map[anchor]
                     if file != current:
-                        a.set('href', file+href)            
+                        a.set('href', file+href)
             open(content(current), 'wb').\
                 write(tostring(tree.getroot(), pretty_print=self.opts.pretty_print))
-            
+
         os.remove(content(self.path))
 
 
@@ -391,12 +391,12 @@ class Splitter(object):
         id_map = {}
         for item in items:
             id_map[item.get('id')] = opf.replace_manifest_item(item, new_items)
-        
+
         for id in id_map.keys():
             opf.replace_spine_items_by_idref(id, id_map[id])
-        
+
         for ref in opf.iterguide():
-            href = ref.get('href', '') 
+            href = ref.get('href', '')
             if href.startswith('content/'+self.path):
                 href = href.split('#')
                 frag = None
@@ -408,8 +408,8 @@ class Splitter(object):
                 new_file = self.anchor_map[frag]
                 ref.set('href', 'content/'+new_file+('' if frag is None else ('#'+frag)))
 
-          
-                
+
+
 def fix_content_links(html_files, changes, opts):
     split_files = [f.path for f in changes]
     anchor_maps = [f.anchor_map for f in changes]
@@ -420,7 +420,7 @@ def fix_content_links(html_files, changes, opts):
             files[i:i+1] = changes[j].files
         except ValueError:
             continue
-        
+
     for htmlfile in files:
         changed = False
         root = html.fromstring(open(content(htmlfile), 'rb').read())
@@ -439,7 +439,7 @@ def fix_content_links(html_files, changes, opts):
                     frag = ('#'+anchor) if anchor else ''
                     a.set('href', newf+frag)
                     changed = True
-                    
+
         if changed:
             open(content(htmlfile), 'wb').write(tostring(root, pretty_print=opts.pretty_print))
 
@@ -448,7 +448,7 @@ def fix_ncx(path, changes):
     anchor_maps = [f.anchor_map for f in changes]
     tree = etree.parse(path)
     changed = False
-    for content in tree.getroot().xpath('//x:content[@src]', 
+    for content in tree.getroot().xpath('//x:content[@src]',
                     namespaces={'x':"http://www.daisy.org/z3986/2005/ncx/"}):
         href = content.get('src')
         if not href.startswith('#'):
@@ -481,21 +481,21 @@ def find_html_files(opf):
             if os.path.exists(content(f)):
                 html_files.append(f)
     return html_files
-        
+
 
 def split(pathtoopf, opts, stylesheet_map):
     pathtoopf = os.path.abspath(pathtoopf)
     opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
-    
+
     with CurrentDir(os.path.dirname(pathtoopf)):
         html_files = find_html_files(opf)
         changes = [Splitter(f, opts, stylesheet_map, opf) for f in html_files]
         changes = [c for c in changes if c.was_split]
-        
+
         fix_content_links(html_files, changes, opts)
         for item in opf.itermanifest():
             if item.get('media-type', '') == 'application/x-dtbncx+xml':
                 fix_ncx(item.get('href'), changes)
-                break 
+                break
 
         open(pathtoopf, 'wb').write(opf.render())
diff --git a/src/calibre/ebooks/html/__init__.py b/src/calibre/ebooks/html/__init__.py
new file mode 100644
index 0000000000..9a8f8e2d20
--- /dev/null
+++ b/src/calibre/ebooks/html/__init__.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re
+
+from lxml.etree import tostring as _tostring
+
+def tostring(root, strip_comments=False, pretty_print=False):
+    '''
+    Serialize processed XHTML.
+    '''
+    root.set('xmlns', 'http://www.w3.org/1999/xhtml')
+    root.set('{http://www.w3.org/1999/xhtml}xlink', 'http://www.w3.org/1999/xlink')
+    for x in root.iter():
+        if x.tag.rpartition('}')[-1].lower() == 'svg':
+            x.set('xmlns', 'http://www.w3.org/2000/svg')
+
+    ans = _tostring(root, encoding='utf-8', pretty_print=pretty_print)
+    if strip_comments:
+        ans = re.compile(r'<!--.*?-->', re.DOTALL).sub('', ans)
+    ans = '<?xml version="1.0" encoding="utf-8" ?>\n'+ans
+
+    return ans
+
+
diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py
new file mode 100644
index 0000000000..dd9aa0285c
--- /dev/null
+++ b/src/calibre/ebooks/html/input.py
@@ -0,0 +1,342 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+'''
+Input plugin for HTML or OPF ebooks.
+'''
+
+import os, re, sys, cStringIO
+from urlparse import urlparse, urlunparse
+from urllib import unquote
+
+from calibre.customize.conversion import InputFormatPlugin
+from calibre.ebooks.metadata.meta import get_metadata
+from calibre.ebooks.metadata.opf2 import OPF, OPFCreator
+from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.customize.conversion import OptionRecommendation
+from calibre import unicode_path
+
+class Link(object):
+    '''
+    Represents a link in a HTML file.
+    '''
+
+    @classmethod
+    def url_to_local_path(cls, url, base):
+        path = urlunparse(('', '', url.path, url.params, url.query, ''))
+        path = unquote(path)
+        if os.path.isabs(path):
+            return path
+        return os.path.abspath(os.path.join(base, path))
+
+    def __init__(self, url, base):
+        '''
+        :param url:  The url this link points to. Must be an unquoted unicode string.
+        :param base: The base directory that relative URLs are with respect to.
+                     Must be a unicode string.
+        '''
+        assert isinstance(url, unicode) and isinstance(base, unicode)
+        self.url         = url
+        self.parsed_url  = urlparse(self.url)
+        self.is_local    = self.parsed_url.scheme in ('', 'file')
+        self.is_internal = self.is_local and not bool(self.parsed_url.path)
+        self.path        = None
+        self.fragment    = unquote(self.parsed_url.fragment)
+        if self.is_local and not self.is_internal:
+            self.path = self.url_to_local_path(self.parsed_url, base)
+
+    def __hash__(self):
+        if self.path is None:
+            return hash(self.url)
+        return hash(self.path)
+
+    def __eq__(self, other):
+        return self.path == getattr(other, 'path', other)
+
+    def __str__(self):
+        return u'Link: %s --> %s'%(self.url, self.path)
+
+
+class IgnoreFile(Exception):
+
+    def __init__(self, msg, errno):
+        Exception.__init__(self, msg)
+        self.doesnt_exist = errno == 2
+        self.errno = errno
+
+class HTMLFile(object):
+    '''
+    Contains basic information about an HTML file. This
+    includes a list of links to other files as well as
+    the encoding of each file. Also tries to detect if the file is not a HTML
+    file in which case :member:`is_binary` is set to True.
+
+    The encoding of the file is available as :member:`encoding`.
+    '''
+
+    HTML_PAT  = re.compile(r'<\s*html', re.IGNORECASE)
+    TITLE_PAT = re.compile('<title>([^<>]+)</title>', re.IGNORECASE)
+    LINK_PAT  = re.compile(
+    r'<\s*a\s+.*?href\s*=\s*(?:(?:"(?P<url1>[^"]+)")|(?:\'(?P<url2>[^\']+)\')|(?P<url3>[^\s>]+))',
+    re.DOTALL|re.IGNORECASE)
+
+    def __init__(self, path_to_html_file, level, encoding, verbose, referrer=None):
+        '''
+        :param level: The level of this file. Should be 0 for the root file.
+        :param encoding: Use `encoding` to decode HTML.
+        :param referrer: The :class:`HTMLFile` that first refers to this file.
+        '''
+        self.path     = unicode_path(path_to_html_file, abs=True)
+        self.title    = os.path.splitext(os.path.basename(self.path))[0]
+        self.base     = os.path.dirname(self.path)
+        self.level    = level
+        self.referrer = referrer
+        self.links    = []
+
+        try:
+            with open(self.path, 'rb') as f:
+                src = f.read()
+        except IOError, err:
+            msg = 'Could not read from file: %s with error: %s'%(self.path, unicode(err))
+            if level == 0:
+                raise IOError(msg)
+            raise IgnoreFile(msg, err.errno)
+
+        self.is_binary = not bool(self.HTML_PAT.search(src[:1024]))
+        if not self.is_binary:
+            if encoding is None:
+                encoding = xml_to_unicode(src[:4096], verbose=verbose)[-1]
+                self.encoding = encoding
+            else:
+                self.encoding = encoding
+
+            src = src.decode(encoding, 'replace')
+            match = self.TITLE_PAT.search(src)
+            self.title = match.group(1) if match is not None else self.title
+            self.find_links(src)
+
+
+
+    def __eq__(self, other):
+        return self.path == getattr(other, 'path', other)
+
+    def __str__(self):
+        return u'HTMLFile:%d:%s:%s'%(self.level, 'b' if self.is_binary else 'a', self.path)
+
+    def __repr__(self):
+        return str(self)
+
+
+    def find_links(self, src):
+        for match in self.LINK_PAT.finditer(src):
+            url = None
+            for i in ('url1', 'url2', 'url3'):
+                url = match.group(i)
+                if url:
+                    break
+            link = self.resolve(url)
+            if link not in self.links:
+                self.links.append(link)
+
+    def resolve(self, url):
+        return Link(url, self.base)
+
+
+def depth_first(root, flat, visited=set([])):
+    yield root
+    visited.add(root)
+    for link in root.links:
+        if link.path is not None and link not in visited:
+            try:
+                index = flat.index(link)
+            except ValueError: # Can happen if max_levels is used
+                continue
+            hf = flat[index]
+            if hf not in visited:
+                yield hf
+                visited.add(hf)
+                for hf in depth_first(hf, flat, visited):
+                    if hf not in visited:
+                        yield hf
+                        visited.add(hf)
+
+
+def traverse(path_to_html_file, max_levels=sys.maxint, verbose=0, encoding=None):
+    '''
+    Recursively traverse all links in the HTML file.
+
+    :param max_levels: Maximum levels of recursion. Must be non-negative. 0
+                       implies that no links in the root HTML file are followed.
+    :param encoding:   Specify character encoding of HTML files. If `None` it is
+                       auto-detected.
+    :return:           A pair of lists (breadth_first, depth_first). Each list contains
+                       :class:`HTMLFile` objects.
+    '''
+    assert max_levels >= 0
+    level = 0
+    flat =  [HTMLFile(path_to_html_file, level, encoding, verbose)]
+    next_level = list(flat)
+    while level < max_levels and len(next_level) > 0:
+        level += 1
+        nl = []
+        for hf in next_level:
+            rejects = []
+            for link in hf.links:
+                if link.path is None or link.path in flat:
+                    continue
+                try:
+                    nf = HTMLFile(link.path, level, encoding, verbose, referrer=hf)
+                    if nf.is_binary:
+                        raise IgnoreFile('%s is a binary file'%nf.path, -1)
+                    nl.append(nf)
+                    flat.append(nf)
+                except IgnoreFile, err:
+                    rejects.append(link)
+                    if not err.doesnt_exist or verbose > 1:
+                        print repr(err)
+            for link in rejects:
+                hf.links.remove(link)
+
+        next_level = list(nl)
+    orec = sys.getrecursionlimit()
+    sys.setrecursionlimit(500000)
+    try:
+        return flat, list(depth_first(flat[0], flat))
+    finally:
+        sys.setrecursionlimit(orec)
+
+
+def opf_traverse(opf_reader, verbose=0, encoding=None):
+    '''
+    Return a list of :class:`HTMLFile` objects in the order specified by the
+    `<spine>` element of the OPF.
+
+    :param opf_reader: An :class:`calibre.ebooks.metadata.opf2.OPF` instance.
+    :param encoding:   Specify character encoding of HTML files. If `None` it is
+                       auto-detected.
+    '''
+    if not opf_reader.spine:
+        raise ValueError('OPF does not have a spine')
+    flat = []
+    for path in opf_reader.spine.items():
+        path = os.path.abspath(path)
+        if path not in flat:
+            flat.append(os.path.abspath(path))
+    for item in opf_reader.manifest:
+        if 'html' in item.mime_type:
+            path = os.path.abspath(item.path)
+            if path not in flat:
+                flat.append(path)
+    for i, path in enumerate(flat):
+        if not os.path.exists(path):
+            path = path.replace('&', '%26')
+            if os.path.exists(path):
+                flat[i] = path
+                for item in opf_reader.itermanifest():
+                    item.set('href', item.get('href').replace('&', '%26'))
+    ans = []
+    for path in flat:
+        if os.path.exists(path):
+            ans.append(HTMLFile(path, 0, encoding, verbose))
+        else:
+            print 'WARNING: OPF spine item %s does not exist'%path
+    ans = [f for f in ans if not f.is_binary]
+    return ans
+
+def search_for_opf(dir):
+    for f in os.listdir(dir):
+        if f.lower().endswith('.opf'):
+            return OPF(open(os.path.join(dir, f), 'rb'), dir)
+
+def get_filelist(htmlfile, dir, opts, log):
+    '''
+    Build list of files referenced by html file or try to detect and use an
+    OPF file instead.
+    '''
+    print 'Building file list...'
+    opf = search_for_opf(dir)
+    filelist = None
+    if opf is not None:
+        try:
+            filelist = opf_traverse(opf, verbose=opts.verbose,
+                    encoding=opts.input_encoding)
+        except:
+            pass
+    if not filelist:
+        filelist = traverse(htmlfile, max_levels=int(opts.max_levels),
+                            verbose=opts.verbose,
+                            encoding=opts.input_encoding)\
+                    [0 if opts.breadth_first else 1]
+    if opts.verbose:
+        log.debug('\tFound files...')
+        for f in filelist:
+            log.debug('\t\t', f)
+    return opf, filelist
+
+
+class HTMLInput(InputFormatPlugin):
+
+    name        = 'HTML Input'
+    author      = 'Kovid Goyal'
+    description = 'Convert HTML and OPF files to an OEB'
+    file_types  = set(['opf', 'html', 'htm', 'xhtml', 'xhtm'])
+
+    options = set([
+        OptionRecommendation(name='breadth_first',
+            recommended_value=False, level=OptionRecommendation.LOW,
+            help=_('Traverse links in HTML files breadth first. Normally, '
+                    'they are traversed depth first.'
+                   )
+        ),
+
+        OptionRecommendation(name='max_levels',
+            recommended_value=5, level=OptionRecommendation.LOW,
+            help=_('Maximum levels of recursion when following links in '
+                   'HTML files. Must be non-negative. 0 implies that no '
+                   'links in the root HTML file are followed. Default is '
+                   '%default.'
+                   )
+        ),
+
+    ])
+
+    def convert(self, stream, opts, file_ext, log,
+                accelerators):
+        basedir = os.getcwd()
+        if hasattr(stream, 'name'):
+            basedir = os.path.dirname(stream.name)
+        if file_ext == 'opf':
+            opf = OPF(stream, basedir)
+            filelist = opf_traverse(opf, verbose=opts.verbose,
+                    encoding=opts.input_encoding)
+            mi = MetaInformation(opf)
+        else:
+            opf, filelist = get_filelist(stream.name, basedir, opts, log)
+            mi = MetaInformation(opf)
+            mi.smart_update(get_metadata(stream, 'html'))
+
+        mi = OPFCreator(os.getcwdu(), mi)
+        mi.guide = None
+        entries = [(f.path, 'application/xhtml+xml') for f in filelist]
+        mi.create_manifest(entries)
+        mi.create_spine([f.path for f in filelist])
+
+        tocbuf = cStringIO.StringIO()
+        mi.render(open('metadata.opf', 'wb'), tocbuf, 'toc.ncx')
+        toc = tocbuf.getvalue()
+        if toc:
+            open('toc.ncx', 'wb').write(toc)
+
+        from calibre.ebooks.conversion.plumber import create_oebbook
+        return create_oebbook(log, os.path.abspath('metadata.opf'))
+
+
+
+
diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html_old.py
similarity index 100%
rename from src/calibre/ebooks/html.py
rename to src/calibre/ebooks/html_old.py
diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py
index f6b5a9bd1a..4b7648d81f 100644
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@@ -683,26 +683,6 @@ class OPF(object):
 
         return property(fget=fget, fset=fset)
 
-    @dynamic_property
-    def title_sort(self):
-
-        def fget(self):
-            matches = self.title_path(self.metadata)
-            if matches:
-                for match in matches:
-                    ans = match.get('{%s}file-as'%self.NAMESPACES['opf'], None)
-                    if not ans:
-                        ans = match.get('file-as', None)
-                    if ans:
-                        return ans
-
-        def fset(self, val):
-            matches = self.title_path(self.metadata)
-            if matches:
-                matches[0].set('file-as', unicode(val))
-
-        return property(fget=fget, fset=fset)
-
     @dynamic_property
     def tags(self):
 
@@ -943,9 +923,10 @@ class OPFCreator(MetaInformation):
         from calibre.resources import opf_template
         from calibre.utils.genshi.template import MarkupTemplate
         template = MarkupTemplate(opf_template)
+        toc = getattr(self, 'toc', None)
         if self.manifest:
             self.manifest.set_basedir(self.base_path)
-            if ncx_manifest_entry is not None:
+            if ncx_manifest_entry is not None and toc is not None:
                 if not os.path.isabs(ncx_manifest_entry):
                     ncx_manifest_entry = os.path.join(self.base_path, ncx_manifest_entry)
                 remove = [i for i in self.manifest if i.id == 'ncx']
@@ -965,7 +946,6 @@ class OPFCreator(MetaInformation):
         opf = template.generate(__appname__=__appname__, mi=self, __version__=__version__).render('xml')
         opf_stream.write(opf)
         opf_stream.flush()
-        toc = getattr(self, 'toc', None)
         if toc is not None and ncx_stream is not None:
             toc.render(ncx_stream, self.application_id)
             ncx_stream.flush()
@@ -1030,19 +1010,8 @@ class OPFTest(unittest.TestCase):
         self.opf.smart_update(MetaInformation(self.opf))
         self.testReading()
 
-    def testCreator(self):
-        opf = OPFCreator(os.getcwd(), self.opf)
-        buf = cStringIO.StringIO()
-        opf.render(buf)
-        raw = buf.getvalue()
-        self.testReading(opf=OPF(cStringIO.StringIO(raw), os.getcwd()))
-
-    def testSmartUpdate(self):
-        self.opf.smart_update(self.opf)
-        self.testReading()
-
 def suite():
     return unittest.TestLoader().loadTestsFromTestCase(OPFTest)
 
 def test():
-    unittest.TextTestRunner(verbosity=2).run(suite())
\ No newline at end of file
+    unittest.TextTestRunner(verbosity=2).run(suite())
diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py
index 8f2e24a831..2eb45c9161 100644
--- a/src/calibre/ebooks/mobi/input.py
+++ b/src/calibre/ebooks/mobi/input.py
@@ -29,5 +29,5 @@ class MOBIInput(InputFormatPlugin):
             with open(f, 'wb') as q:
                 q.write(html.tostring(root, encoding='utf-8', method='xml',
                     include_meta_content_type=False))
-            accelerators['pagebreaks'] = {f: '//div[@class="mbp_pagebreak"]'}
+            accelerators['pagebreaks'] = {f: '//*[@class="mbp_pagebreak"]'}
         return mr.created_opf_path
diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py
index a78b5085d9..6032ae549a 100644
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@@ -522,7 +522,7 @@ class MobiReader(object):
         else:
             raise MobiError('Unknown compression algorithm: %s'%repr(self.book_header.compression_type))
         if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
-            self.mobi_html = self.mobi_html.replace('\r ', '\n\n ')
+            self.mobi_html = self.mobi_html.replace('\r ', '\n\n').replace('\0', '')
         return processed_records
 
 
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 2abf658697..76a6648e8d 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -151,7 +151,7 @@ def resolve_base_href(root):
         return
     make_links_absolute(root, base_href, resolve_base_href=False)
 
-def rewrite_links(root, link_repl_func, resolve_base_href=True):
+def rewrite_links(root, link_repl_func, resolve_base_href=False):
     '''
     Rewrite all the links in the document.  For each link
     ``link_repl_func(link)`` will be called, and the return value
diff --git a/src/calibre/ebooks/oeb/transforms/package.py b/src/calibre/ebooks/oeb/transforms/package.py
index d8fb485dde..de775f8865 100644
--- a/src/calibre/ebooks/oeb/transforms/package.py
+++ b/src/calibre/ebooks/oeb/transforms/package.py
@@ -6,9 +6,16 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import os, shutil
+import os
+from urllib import unquote as urlunquote
+from functools import partial
 
-from calibre.ebooks.oeb.base import OEB_DOCS
+from lxml import etree
+import cssutils
+
+from calibre.constants import islinux
+from calibre.ebooks.oeb.base import OEB_DOCS, urlnormalize, urldefrag, \
+                                    rewrite_links
 
 class Package(object):
 
@@ -29,18 +36,69 @@ class Package(object):
         self.new_base_path = os.path.abspath(base)
 
     def rewrite_links_in(self, item):
-        new_items = []
-        return new_items
+        base = os.path.join(self.new_base_path, *item.href.split('/'))
+        base = os.path.dirname(base)
+
+        if etree.iselement(item.data):
+            self.rewrite_links_in_xml(item.data, base)
+        elif hasattr(item.data, 'cssText'):
+            self.rewrite_links_in_css(item.data, base)
+
+    def link_replacer(self, link_, base=''):
+        link = urlnormalize(link_)
+        link, frag = urldefrag(link)
+        link = urlunquote(link).replace('/', os.sep)
+        if base and not os.path.isabs(link):
+            link = os.path.join(base, link)
+        link = os.path.abspath(link)
+        if not islinux:
+            link = link.lower()
+        if link not in self.map:
+            return link_
+        nlink = os.path.relpath(self.map[link], base)
+        if frag:
+            nlink = '#'.join(nlink, frag)
+        return nlink.replace(os.sep, '/')
+
+    def rewrite_links_in_css(self, sheet, base):
+        repl = partial(self.link_replacer, base=base)
+        cssutils.replaceUrls(sheet, repl)
+
+    def rewrite_links_in_xml(self, root, base):
+        repl = partial(self.link_replacer, base=base)
+        rewrite_links(root, repl)
 
     def move_manifest_item(self, item):
         item.data # Make sure the data has been loaded and cached
-        old_abspath = os.path.join(self.old_base_path, *item.href.split('/'))
-        bname = item.href.split('/')[-1]
-        new_href = 'content/' + \
-                ('resources/' if item.media_type in OEB_DOCS else '')+bname
+        old_abspath = os.path.join(self.old_base_path,
+                *(urldefrag(item.href)[0].split('/')))
+        old_abspath = os.path.abspath(old_abspath)
+        bname = item.href.split('/')[-1].partition('#')[0]
+        new_href = 'content/resources/'
+        if item.media_type in OEB_DOCS:
+            new_href = 'content/'
+        elif item.href.lower().endswith('.ncx'):
+            new_href = ''
+        new_href += bname
+
+        new_abspath = os.path.join(self.new_base_path, *new_href.split('/'))
+        new_abspath = os.path.abspath(new_abspath)
+        item.href   = new_href
+        if not islinux:
+            old_abspath, new_abspath = old_abspath.lower(), new_abspath.lower()
+        if old_abspath != new_abspath:
+            self.map[old_abspath] = new_abspath
+
+    def rewrite_links_in_toc(self, toc):
+        if toc.href:
+            toc.href = self.link_replacer(toc.href, base=self.new_base_path)
+
+        for x in toc:
+            self.rewrite_links_in_toc(x)
 
     def __call__(self, oeb, context):
         self.map = {}
+        self.log = self.oeb.log
         self.old_base_path = os.path.abspath(oeb.container.rootdir)
 
         for item in self.oeb.manifest:
@@ -49,4 +107,9 @@ class Package(object):
         for item in self.oeb.manifest:
             self.rewrite_links_in(item)
 
+        if getattr(oeb.toc, 'nodes', False):
+            self.rewrite_links_in_toc(oeb.toc)
 
+        if hasattr(oeb, 'guide'):
+            for ref in oeb.guide.values():
+                ref.href = self.link_replacer(ref.href, base=self.new_base_path)
diff --git a/src/calibre/ebooks/oeb/transforms/trimmanifest.py b/src/calibre/ebooks/oeb/transforms/trimmanifest.py
index cae56315e5..b3e9cd58e7 100644
--- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py
+++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py
@@ -6,11 +6,12 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 
-from itertools import chain
 from urlparse import urldefrag
+
+import cssutils
+
 from calibre.ebooks.oeb.base import CSS_MIME, OEB_DOCS
-from calibre.ebooks.oeb.base import LINK_SELECTORS, CSSURL_RE
-from calibre.ebooks.oeb.base import urlnormalize
+from calibre.ebooks.oeb.base import urlnormalize, iterlinks
 
 class ManifestTrimmer(object):
     @classmethod
@@ -44,16 +45,15 @@ class ManifestTrimmer(object):
                 if (item.media_type in OEB_DOCS or
                     item.media_type[-4:] in ('/xml', '+xml')) and \
                    item.data is not None:
-                    hrefs = [sel(item.data) for sel in LINK_SELECTORS]
-                    for href in chain(*hrefs):
+                    hrefs = [r[2] for r in iterlinks(item.data)]
+                    for href in hrefs:
                         href = item.abshref(urlnormalize(href))
                         if href in oeb.manifest.hrefs:
                             found = oeb.manifest.hrefs[href]
                             if found not in used:
                                 new.add(found)
                 elif item.media_type == CSS_MIME:
-                    for match in CSSURL_RE.finditer(item.data.cssText):
-                        href = match.group('url')
+                    for href in cssutils.getUrls(item.data):
                         href = item.abshref(urlnormalize(href))
                         if href in oeb.manifest.hrefs:
                             found = oeb.manifest.hrefs[href]
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index 592a25c170..ee51370b61 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -22,9 +22,6 @@ entry_points = {
              'web2disk           = calibre.web.fetch.simple:main',
              'feeds2disk         = calibre.web.feeds.main:main',
              'calibre-server     = calibre.library.server:main',
-             'feeds2lrf          = calibre.ebooks.lrf.feeds.convert_from:main',
-             'feeds2epub         = calibre.ebooks.epub.from_feeds:main',
-             'feeds2mobi         = calibre.ebooks.mobi.from_feeds:main',
              'web2lrf            = calibre.ebooks.lrf.web.convert_from:main',
              'lrf2lrs            = calibre.ebooks.lrf.lrfparser:main',
              'lrs2lrf            = calibre.ebooks.lrf.lrs.convert_from:main',
@@ -154,10 +151,7 @@ def setup_completion(fatal_errors):
         from calibre.ebooks.lrf.pdf.reflow import option_parser as pdfhtmlop
         from calibre.web.feeds.main import option_parser as feeds2disk
         from calibre.web.feeds.recipes import titles as feed_titles
-        from calibre.ebooks.lrf.feeds.convert_from import option_parser as feeds2lrf
         from calibre.ebooks.lrf.comic.convert_from import option_parser as comicop
-        from calibre.ebooks.epub.from_feeds import option_parser as feeds2epub
-        from calibre.ebooks.mobi.from_feeds import option_parser as feeds2mobi
         from calibre.ebooks.epub.from_comic import option_parser as comic2epub
         from calibre.ebooks.metadata.fetch import option_parser as fem_op
         from calibre.gui2.main import option_parser as guiop
@@ -192,9 +186,6 @@ def setup_completion(fatal_errors):
         f.write(opts_and_exts('comic2mobi', comic2epub, ['cbz', 'cbr']))
         f.write(opts_and_exts('comic2pdf', comic2epub, ['cbz', 'cbr']))
         f.write(opts_and_words('feeds2disk', feeds2disk, feed_titles))
-        f.write(opts_and_words('feeds2lrf', feeds2lrf, feed_titles))
-        f.write(opts_and_words('feeds2epub', feeds2epub, feed_titles))
-        f.write(opts_and_words('feeds2mobi', feeds2mobi, feed_titles))
         f.write(opts_and_words('fetch-ebook-metadata', fem_op, []))
         f.write(opts_and_words('calibre-smtp', smtp_op, []))
         f.write('''