From 4cd285859b6721c48eefd9b23fe47b0bfc5ab871 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 23 Apr 2009 22:31:11 -0700
Subject: [PATCH] Initial implementation of EPUB Output plugin

---
 src/calibre/customize/builtins.py             |   3 +-
 src/calibre/customize/profiles.py             |   4 +-
 src/calibre/ebooks/epub/__init__.py           | 173 ------
 src/calibre/ebooks/epub/fonts.py              | 300 ----------
 src/calibre/ebooks/epub/from_any.py           |  93 ---
 src/calibre/ebooks/epub/from_feeds.py         |  71 ---
 src/calibre/ebooks/epub/from_html.py          | 547 ------------------
 src/calibre/ebooks/epub/output.py             | 221 ++++++-
 src/calibre/ebooks/oeb/iterator.py            |   4 +-
 src/calibre/ebooks/oeb/transforms/guide.py    |  13 +-
 src/calibre/ebooks/oeb/transforms/rescale.py  |  37 ++
 src/calibre/ebooks/oeb/transforms/split.py    |   5 +-
 .../ebooks/oeb/transforms/structure.py        |  19 +-
 13 files changed, 285 insertions(+), 1205 deletions(-)
 delete mode 100644 src/calibre/ebooks/epub/fonts.py
 delete mode 100644 src/calibre/ebooks/epub/from_any.py
 delete mode 100644 src/calibre/ebooks/epub/from_feeds.py
 delete mode 100644 src/calibre/ebooks/epub/from_html.py
 create mode 100644 src/calibre/ebooks/oeb/transforms/rescale.py

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index e0e9158f0e..c726a19b2a 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -287,13 +287,14 @@ from calibre.ebooks.odt.input import ODTInput
 from calibre.ebooks.rtf.input import RTFInput
 from calibre.ebooks.html.input import HTMLInput
 from calibre.ebooks.oeb.output import OEBOutput
+from calibre.ebooks.epub.output import EPUBOutput
 from calibre.ebooks.txt.output import TXTOutput
 from calibre.ebooks.pdf.output import PDFOutput
 from calibre.customize.profiles import input_profiles, output_profiles
 
 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
         TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
-        FB2Input, ODTInput, RTFInput]
+        FB2Input, ODTInput, RTFInput, EPUBOutput]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                         x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py
index c11529f025..67dd920135 100644
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@@ -3,7 +3,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import sys, re
+import re
 from itertools import izip
 
 from calibre.customize import Plugin as _Plugin
@@ -22,7 +22,7 @@ class Plugin(_Plugin):
 
     fbase  = 12
     fsizes = [5, 7, 9, 12, 13.5, 17, 20, 22, 24]
-    screen_size = (800, 600)
+    screen_size = (1600, 1200)
     dpi = 100
 
     def __init__(self, *args, **kwargs):
diff --git a/src/calibre/ebooks/epub/__init__.py b/src/calibre/ebooks/epub/__init__.py
index 2bc076a8ad..f5de8421e0 100644
--- a/src/calibre/ebooks/epub/__init__.py
+++ b/src/calibre/ebooks/epub/__init__.py
@@ -6,32 +6,7 @@ __docformat__ = 'restructuredtext en'
 '''
 Conversion to EPUB.
 '''
-import sys, textwrap, re, os, uuid
-from itertools import cycle
-from calibre.utils.config import Config, StringConfig
 from calibre.utils.zipfile import ZipFile, ZIP_STORED
-from calibre.ebooks.html import tostring
-from lxml import etree
-
-class DefaultProfile(object):
-
-    flow_size            = sys.maxint
-    screen_size          = None
-    remove_special_chars = False
-    remove_object_tags   = False
-
-class PRS505(DefaultProfile):
-
-    flow_size            = 270000
-    screen_size          = (590, 765)
-    remove_special_chars = re.compile(u'[\u200b\u00ad]')
-    remove_object_tags   = True
-
-
-PROFILES = {
-            'PRS505' : PRS505,
-            'None'   : DefaultProfile,
-            }
 
 def rules(stylesheets):
     for s in stylesheets:
@@ -58,152 +33,4 @@ def initialize_container(path_to_container, opf_name='metadata.opf'):
     zf.writestr('META-INF/container.xml', CONTAINER)
     return zf
 
-def config(defaults=None, name='epub'):
-    desc = _('Options to control the conversion to EPUB')
-    if defaults is None:
-        c = Config(name, desc)
-    else:
-        c = StringConfig(defaults, desc)
 
-    c.update(common_config())
-    c.remove_opt('output')
-    c.remove_opt('zip')
-
-    c.add_opt('output', ['-o', '--output'], default=None,
-             help=_('The output EPUB file. If not specified, it is '
-                    'derived from the input file name.'))
-    c.add_opt('profile', ['--profile'], default='PRS505', choices=list(PROFILES.keys()),
-              help=_('Profile of the target device this EPUB is meant for. '
-                     'Set to None to create a device independent EPUB. '
-                     'The profile is used for device specific restrictions '
-                     'on the EPUB. Choices are: ')+str(list(PROFILES.keys())))
-    c.add_opt('override_css', ['--override-css'], default=None,
-              help=_('Either the path to a CSS stylesheet or raw CSS. '
-                     'This CSS will override any existing CSS '
-                     'declarations in the source files.'))
-    structure = c.add_group('structure detection',
-                            _('Control auto-detection of document structure.'))
-    structure('chapter', ['--chapter'],
-              default="//*[re:match(name(), 'h[1-2]') and "
-              "re:test(., 'chapter|book|section|part', 'i')] | "
-              "//*[@class = 'chapter']",
-            help=_('''\
-An XPath expression to detect chapter titles. The default is to consider <h1> or
-<h2> tags that contain the words "chapter","book","section" or "part" as chapter titles as
-well as any tags that have class="chapter".
-The expression used must evaluate to a list of elements. To disable chapter detection,
-use the expression "/". See the XPath Tutorial in the calibre User Manual for further
-help on using this feature.
-''').replace('\n', ' '))
-    structure('chapter_mark', ['--chapter-mark'], choices=['pagebreak', 'rule', 'both', 'none'],
-              default='pagebreak',
-              help=_('Specify how to mark detected chapters. A value of '
-                     '"pagebreak" will insert page breaks before chapters. '
-                     'A value of "rule" will insert a line before chapters. '
-                     'A value of "none" will disable chapter marking and a '
-                     'value of "both" will use both page breaks and lines '
-                     'to mark chapters.'))
-    structure('cover', ['--cover'], default=None,
-              help=_('Path to the cover to be used for this book'))
-    structure('prefer_metadata_cover', ['--prefer-metadata-cover'], default=False,
-              action='store_true',
-              help=_('Use the cover detected from the source file in preference '
-                     'to the specified cover.'))
-    structure('remove_first_image', ['--remove-first-image'], default=False,
-              help=_('Remove the first image from the input ebook. Useful if '
-                     'the first image in the source file is a cover and you '
-                     'are specifying an external cover.'))
-    structure('dont_split_on_page_breaks', ['--dont-split-on-page-breaks'], default=False,
-              help=_('Turn off splitting at page breaks. Normally, input files '
-                     'are automatically split at every page break into '
-                     'two files. This gives an output ebook that can be parsed '
-                     'faster and with less resources. However, splitting is '
-                     'slow and if your source file contains a very large '
-                     'number of page breaks, you should turn off splitting '
-                     'on page breaks.'))
-    structure('page', ['--page'], default=None,
-              help=_('XPath expression to detect page boundaries for building '
-                     'a custom pagination map, as used by AdobeDE. Default is '
-                     'not to build an explicit pagination map.'))
-    structure('page_names', ['--page-names'], default=None,
-              help=_('XPath expression to find the name of each page in the '
-                     'pagination map relative to its boundary element. '
-                     'Default is to number all pages staring with 1.'))
-    toc = c.add_group('toc',
-        _('''\
-Control the automatic generation of a Table of Contents. If an OPF file is detected
-and it specifies a Table of Contents, then that will be used rather than trying
-to auto-generate a Table of Contents.
-''').replace('\n', ' '))
-    toc('max_toc_links', ['--max-toc-links'], default=50,
-        help=_('Maximum number of links to insert into the TOC. Set to 0 '
-               'to disable. Default is: %default. Links are only added to the '
-               'TOC if less than the --toc-threshold number of chapters were detected.'))
-    toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
-        help=_("Don't add auto-detected chapters to the Table of Contents."))
-    toc('toc_threshold', ['--toc-threshold'], default=6,
-        help=_('If fewer than this number of chapters is detected, then links '
-               'are added to the Table of Contents. Default: %default'))
-    toc('level1_toc', ['--level1-toc'], default=None,
-        help=_('XPath expression that specifies all tags that should be added '
-               'to the Table of Contents at level one. If this is specified, '
-               'it takes precedence over other forms of auto-detection.'))
-    toc('level2_toc', ['--level2-toc'], default=None,
-        help=_('XPath expression that specifies all tags that should be added '
-               'to the Table of Contents at level two. Each entry is added '
-               'under the previous level one entry.'))
-    toc('level3_toc', ['--level3-toc'], default=None,
-        help=_('XPath expression that specifies all tags that should be added '
-               'to the Table of Contents at level three. Each entry is added '
-               'under the previous level two entry.'))
-    toc('from_ncx', ['--from-ncx'], default=None,
-        help=_('Path to a .ncx file that contains the table of contents to use '
-               'for this ebook. The NCX file should contain links relative to '
-               'the directory it is placed in. See '
-               'http://www.niso.org/workrooms/daisy/Z39-86-2005.html#NCX for '
-               'an overview of the NCX format.'))
-    toc('use_auto_toc', ['--use-auto-toc'], default=False,
-        help=_('Normally, if the source file already has a Table of Contents, '
-               'it is used in preference to the auto-generated one. '
-               'With this option, the auto-generated one is always used.'))
-
-    layout = c.add_group('page layout', _('Control page layout'))
-    layout('margin_top', ['--margin-top'], default=5.0,
-           help=_('Set the top margin in pts. Default is %default'))
-    layout('margin_bottom', ['--margin-bottom'], default=5.0,
-           help=_('Set the bottom margin in pts. Default is %default'))
-    layout('margin_left', ['--margin-left'], default=5.0,
-           help=_('Set the left margin in pts. Default is %default'))
-    layout('margin_right', ['--margin-right'], default=5.0,
-           help=_('Set the right margin in pts. Default is %default'))
-    layout('base_font_size2', ['--base-font-size'], default=12.0,
-           help=_('The base font size in pts. Default is %defaultpt. '
-                  'Set to 0 to disable rescaling of fonts.'))
-    layout('remove_paragraph_spacing', ['--remove-paragraph-spacing'], default=False,
-           help=_('Remove spacing between paragraphs. '
-                  'Also sets a indent on paragraphs of 1.5em. '
-                  'You can override this by adding p {text-indent: 0cm} to '
-                  '--override-css. Spacing removal will not work if the source '
-                  'file forces inter-paragraph spacing.'))
-    layout('no_justification', ['--no-justification'], default=False,
-           help=_('Do not force text to be justified in output.'))
-    layout('linearize_tables', ['--linearize-tables'], default=False,
-           help=_('Remove table markup, converting it into paragraphs. '
-                  'This is useful if your source file uses a table to manage layout.'))
-    layout('preserve_tag_structure', ['--preserve-tag-structure'], default=False,
-           help=_('Preserve the HTML tag structure while splitting large HTML files. '
-                  'This is only neccessary if the HTML files contain CSS that '
-                  'uses sibling selectors. Enabling this greatly slows down '
-                  'processing of large HTML files.'))
-
-    c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
-              help=_('Print generated OPF file to stdout'))
-    c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
-              help=_('Print generated NCX file to stdout'))
-    c.add_opt('keep_intermediate', ['--keep-intermediate-files'], group='debug',
-              default=False,
-              help=_('Keep intermediate files during processing by html2epub'))
-    c.add_opt('extract_to', ['--extract-to'], group='debug', default=None,
-              help=_('Extract the contents of the produced EPUB file to the '
-                     'specified directory.'))
-    return c
diff --git a/src/calibre/ebooks/epub/fonts.py b/src/calibre/ebooks/epub/fonts.py
deleted file mode 100644
index 67e6066ed1..0000000000
--- a/src/calibre/ebooks/epub/fonts.py
+++ /dev/null
@@ -1,300 +0,0 @@
-#!/usr/bin/env  python
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Font size rationalization. See :function:`relativize`.
-'''
-
-import logging, re, operator, functools, collections, unittest, copy, sys
-from xml.dom import SyntaxErr
-
-from lxml.cssselect import CSSSelector
-from lxml import etree
-from lxml.html import HtmlElement
-
-from calibre.ebooks.html_old import fromstring
-from calibre.ebooks.epub import rules
-from cssutils import CSSParser
-
-num           = r'[-]?\d+|[-]?\d*\.\d+'
-length        = r'(?P<zero>0)|(?P<num>{num})(?P<unit>%|em|ex|px|in|cm|mm|pt|pc)'.replace('{num}', num)
-absolute_size = r'(?P<abs>(x?x-)?(small|large)|medium)'
-relative_size = r'(?P<rel>smaller|larger)'
-
-font_size_pat   = re.compile('|'.join((relative_size, absolute_size, length)), re.I)
-line_height_pat = re.compile(r'({num})(px|in|cm|mm|pt|pc)'.replace('{num}', num))
-
-PTU = {
-       'in' : 72.,
-       'cm' : 72/2.54,
-       'mm' : 72/25.4,
-       'pt' : 1.0,
-       'pc' : 1/12.,
-       }
-
-DEFAULT_FONT_SIZE = 12
-
-class Rationalizer(object):
-
-    @classmethod
-    def specificity(cls, s):
-        '''Map CSS specificity tuple to a single integer'''
-        return sum([10**(4-i) + x for i,x in enumerate(s)])
-
-    @classmethod
-    def compute_font_size(cls, elem):
-        '''
-        Calculate the effective font size of an element traversing its ancestors as far as
-        neccessary.
-        '''
-        cfs = elem.computed_font_size
-        if cfs is not None:
-            return
-        sfs = elem.specified_font_size
-        if callable(sfs):
-            parent = elem.getparent()
-            cls.compute_font_size(parent)
-            elem.computed_font_size = sfs(parent.computed_font_size)
-        else:
-            elem.computed_font_size = sfs
-
-    @classmethod
-    def calculate_font_size(cls, style):
-        'Return font size in pts from style object. For relative units returns a callable'
-        match = font_size_pat.search(style.font)
-        fs = ''
-        if match:
-            fs = match.group()
-        if style.fontSize:
-            fs = style.fontSize
-
-        match = font_size_pat.search(fs)
-        if match is None:
-            return None
-        match = match.groupdict()
-        unit = match.get('unit', '')
-        if unit: unit = unit.lower()
-        if unit in PTU.keys():
-            return PTU[unit] * float(match['num'])
-        if unit in ('em', 'ex'):
-            return functools.partial(operator.mul, float(match['num']))
-        if unit == '%':
-            return functools.partial(operator.mul, float(match['num'])/100.)
-        abs = match.get('abs', '')
-        if abs: abs = abs.lower()
-        if abs:
-            x = (1.2)**(abs.count('x') * (-1 if 'small' in abs else 1))
-            return 12 * x
-        if match.get('zero', False):
-            return 0.
-        return functools.partial(operator.mul, 1.2) if 'larger' in fs.lower() else functools.partial(operator.mul, 0.8)
-
-    @classmethod
-    def resolve_rules(cls, stylesheets):
-        for sheet in stylesheets:
-            if hasattr(sheet, 'fs_rules'):
-                continue
-            sheet.fs_rules = []
-            sheet.lh_rules = []
-            for r in sheet:
-                if r.type == r.STYLE_RULE:
-                    font_size = cls.calculate_font_size(r.style)
-                    if font_size is not None:
-                        for s in r.selectorList:
-                            sheet.fs_rules.append([CSSSelector(s.selectorText), font_size])
-                    orig = line_height_pat.search(r.style.lineHeight)
-                    if orig is not None:
-                        for s in r.selectorList:
-                            sheet.lh_rules.append([CSSSelector(s.selectorText), float(orig.group(1)) * PTU[orig.group(2).lower()]])
-
-
-    @classmethod
-    def apply_font_size_rules(cls, stylesheets, root):
-        'Add a ``specified_font_size`` attribute to every element that has a specified font size'
-        cls.resolve_rules(stylesheets)
-        for sheet in stylesheets:
-            for selector, font_size in sheet.fs_rules:
-                elems = selector(root)
-                for elem in elems:
-                    elem.specified_font_size = font_size
-
-    @classmethod
-    def remove_font_size_information(cls, stylesheets):
-        for r in rules(stylesheets):
-            r.style.removeProperty('font-size')
-            try:
-                new = font_size_pat.sub('', r.style.font).strip()
-                if new:
-                    r.style.font = new
-                else:
-                    r.style.removeProperty('font')
-            except SyntaxErr:
-                r.style.removeProperty('font')
-            if line_height_pat.search(r.style.lineHeight) is not None:
-                r.style.removeProperty('line-height')
-
-    @classmethod
-    def compute_font_sizes(cls, root, stylesheets, base=12):
-        stylesheets = [s for s in stylesheets if hasattr(s, 'cssText')]
-        cls.apply_font_size_rules(stylesheets, root)
-
-        # Compute the effective font size of all tags
-        root.computed_font_size = DEFAULT_FONT_SIZE
-        for elem in root.iter(etree.Element):
-            cls.compute_font_size(elem)
-
-        extra_css = {}
-        if base > 0:
-            # Calculate the "base" (i.e. most common) font size
-            font_sizes = collections.defaultdict(lambda : 0)
-            body = root.xpath('//body')[0]
-            IGNORE = ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')
-            for elem in body.iter(etree.Element):
-                if elem.tag not in IGNORE:
-                    t = getattr(elem, 'text', '')
-                    if t: t = t.strip()
-                    if t:
-                        font_sizes[elem.computed_font_size] += len(t)
-
-                t = getattr(elem, 'tail', '')
-                if t: t = t.strip()
-                if t:
-                    parent = elem.getparent()
-                    if parent.tag not in IGNORE:
-                        font_sizes[parent.computed_font_size] += len(t)
-
-            try:
-                most_common = max(font_sizes.items(), key=operator.itemgetter(1))[0]
-                scale = base/most_common if most_common > 0 else 1.
-            except ValueError:
-                scale = 1.
-
-            # rescale absolute line-heights
-            counter = 0
-            for sheet in stylesheets:
-                for selector, lh in sheet.lh_rules:
-                    for elem in selector(root):
-                        elem.set('id', elem.get('id', 'cfs_%d'%counter))
-                        counter += 1
-                        if not extra_css.has_key(elem.get('id')):
-                            extra_css[elem.get('id')] = []
-                        extra_css[elem.get('id')].append('line-height:%fpt'%(lh*scale))
-
-
-
-            # Rescale all computed font sizes
-            for elem in body.iter(etree.Element):
-                if isinstance(elem, HtmlElement):
-                    elem.computed_font_size *= scale
-
-        # Remove all font size specifications from the last stylesheet
-        cls.remove_font_size_information(stylesheets[-1:])
-
-        # Create the CSS to implement the rescaled font sizes
-        for elem in body.iter(etree.Element):
-            cfs, pcfs = map(operator.attrgetter('computed_font_size'), (elem, elem.getparent()))
-            if abs(cfs-pcfs) > 1/12. and abs(pcfs) > 1/12.:
-                elem.set('id', elem.get('id', 'cfs_%d'%counter))
-                counter += 1
-                if not extra_css.has_key(elem.get('id')):
-                    extra_css[elem.get('id')] = []
-                extra_css[elem.get('id')].append('font-size: %f%%'%(100*(cfs/pcfs)))
-
-        css = CSSParser(loglevel=logging.ERROR).parseString('')
-        for id, r in extra_css.items():
-            css.add('#%s {%s}'%(id, ';'.join(r)))
-        return css
-
-    @classmethod
-    def rationalize(cls, stylesheets, root, opts):
-        logger     = logging.getLogger('html2epub')
-        logger.info('\t\tRationalizing fonts...')
-        extra_css = None
-        if opts.base_font_size2 > 0:
-            try:
-                extra_css = cls.compute_font_sizes(root, stylesheets, base=opts.base_font_size2)
-            except:
-                logger.warning('Failed to rationalize font sizes.')
-                if opts.verbose > 1:
-                    logger.exception('')
-            finally:
-                root.remove_font_size_information()
-        logger.debug('\t\tDone rationalizing')
-        return extra_css
-
-################################################################################
-############## Testing
-################################################################################
-
-class FontTest(unittest.TestCase):
-
-    def setUp(self):
-        from calibre.ebooks.epub import config
-        self.opts = config(defaults='').parse()
-        self.html = '''
-        <html>
-            <head>
-                <title>Test document</title>
-            </head>
-            <body>
-                <div id="div1">
-                <!-- A comment -->
-                    <p id="p1">Some <b>text</b></p>
-                </div>
-                <p id="p2">Some other <span class="it">text</span>.</p>
-                <p id="longest">The longest piece of single font size text in this entire file. Used to test resizing.</p>
-            </body>
-        </html>
-        '''
-        self.root = fromstring(self.html)
-
-    def do_test(self, css, base=DEFAULT_FONT_SIZE, scale=1):
-        root1 = copy.deepcopy(self.root)
-        root1.computed_font_size = DEFAULT_FONT_SIZE
-        stylesheet = CSSParser(loglevel=logging.ERROR).parseString(css)
-        stylesheet2 = Rationalizer.compute_font_sizes(root1, [stylesheet], base)
-        root2 = copy.deepcopy(root1)
-        root2.remove_font_size_information()
-        root2.computed_font_size = DEFAULT_FONT_SIZE
-        Rationalizer.apply_font_size_rules([stylesheet2], root2)
-        for elem in root2.iter(etree.Element):
-            Rationalizer.compute_font_size(elem)
-        for e1, e2 in zip(root1.xpath('//body')[0].iter(etree.Element), root2.xpath('//body')[0].iter(etree.Element)):
-            self.assertAlmostEqual(e1.computed_font_size, e2.computed_font_size,
-                msg='Computed font sizes for %s not equal. Original: %f Processed: %f'%\
-                (root1.getroottree().getpath(e1), e1.computed_font_size, e2.computed_font_size))
-        return stylesheet2.cssText
-
-    def testStripping(self):
-        'Test that any original entries are removed from the CSS'
-        css = 'p { font: bold 10px italic smaller; font-size: x-large} \na { font-size: 0 }'
-        css = CSSParser(loglevel=logging.ERROR).parseString(css)
-        Rationalizer.compute_font_sizes(copy.deepcopy(self.root), [css])
-        self.assertEqual(css.cssText.replace(' ', '').replace('\n', ''),
-                         'p{font:bolditalic}')
-
-    def testIdentity(self):
-        'Test that no unnecessary font size changes are made'
-        extra_css = self.do_test('div {font-size:12pt} \nspan {font-size:100%}')
-        self.assertEqual(extra_css.strip(), '')
-
-    def testRelativization(self):
-        'Test conversion of absolute to relative sizes'
-        self.do_test('#p1 {font: 24pt} b {font: 12pt} .it {font: 48pt} #p2 {font: 100%}')
-
-    def testResizing(self):
-        'Test resizing of fonts'
-        self.do_test('#longest {font: 24pt} .it {font:20pt; line-height:22pt}')
-
-
-def suite():
-    return unittest.TestLoader().loadTestsFromTestCase(FontTest)
-
-def test():
-    unittest.TextTestRunner(verbosity=2).run(suite())
-
-if __name__ == '__main__':
-    sys.exit(test())
-
diff --git a/src/calibre/ebooks/epub/from_any.py b/src/calibre/ebooks/epub/from_any.py
deleted file mode 100644
index 2f3f81124f..0000000000
--- a/src/calibre/ebooks/epub/from_any.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Convert any ebook format to epub.
-'''
-
-import sys, os, re
-from contextlib import nested
-
-from calibre import extract, walk
-from calibre.ebooks import DRMError
-from calibre.ebooks.epub import config as common_config
-from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index
-from calibre.ptempfile import TemporaryDirectory
-from calibre.utils.zipfile import ZipFile
-from calibre.customize.ui import run_plugins_on_preprocess
-
-
-SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
-                  'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']
-
-def unarchive(path, tdir):
-    extract(path, tdir)
-    files = list(walk(tdir))
-
-    for ext in ['opf'] + list(MAP.keys()):
-        for f in files:
-            if f.lower().endswith('.'+ext):
-                if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
-                    continue
-                return f, ext
-    return find_html_index(files)
-
-def any2epub(opts, path, notification=None, create_epub=True,
-             oeb_cover=False, extract_to=None):
-    path = run_plugins_on_preprocess(path)
-    ext = os.path.splitext(path)[1]
-    if not ext:
-        raise ValueError('Unknown file type: '+path)
-    ext = ext.lower()[1:]
-
-    if opts.output is None:
-        opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
-
-    with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
-        if ext in ['rar', 'zip', 'oebzip']:
-            path, ext = unarchive(path, tdir1)
-            print 'Found %s file in archive'%(ext.upper())
-
-        if ext in MAP.keys():
-            path = MAP[ext](path, tdir2, opts)
-            ext = 'opf'
-
-
-        if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
-            raise ValueError('Conversion from %s is not supported'%ext.upper())
-
-        print 'Creating EPUB file...'
-        html2epub(path, opts, notification=notification,
-                  create_epub=create_epub, oeb_cover=oeb_cover,
-                  extract_to=extract_to)
-
-def config(defaults=None):
-    return common_config(defaults=defaults)
-
-
-def formats():
-    return ['html', 'rar', 'zip', 'oebzip']+list(MAP.keys())
-
-USAGE = _('''\
-%%prog [options] filename
-
-Convert any of a large number of ebook formats to a %s file. Supported formats are: %s
-''')
-
-def option_parser(usage=USAGE):
-    return config().option_parser(usage=usage%('EPUB', formats()))
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) < 2:
-        parser.print_help()
-        print 'No input file specified.'
-        return 1
-    any2epub(opts, args[1])
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/src/calibre/ebooks/epub/from_feeds.py b/src/calibre/ebooks/epub/from_feeds.py
deleted file mode 100644
index 6a12353f50..0000000000
--- a/src/calibre/ebooks/epub/from_feeds.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Convert periodical content into EPUB ebooks.
-'''
-import sys, glob, os
-from calibre.web.feeds.main import config as feeds2disk_config, USAGE, run_recipe
-from calibre.ebooks.epub.from_html import config as html2epub_config
-from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.epub.from_html import convert as html2epub
-from calibre import strftime, sanitize_file_name
-
-def config(defaults=None):
-    c = feeds2disk_config(defaults=defaults)
-    c.remove('lrf')
-    c.remove('epub')
-    c.remove('output_dir')
-    c.update(html2epub_config(defaults=defaults))
-    c.remove('chapter_mark')
-    return c
-
-def option_parser():
-    c = config()
-    return c.option_parser(usage=USAGE)
-
-def convert(opts, recipe_arg, notification=None):
-    opts.lrf  = False
-    opts.epub = True
-    if opts.debug:
-        opts.verbose = 2
-    parser = option_parser()
-    with TemporaryDirectory('_feeds2epub') as tdir:
-        opts.output_dir = tdir
-        recipe = run_recipe(opts, recipe_arg, parser, notification=notification)
-        c = config()
-        recipe_opts = c.parse_string(recipe.html2epub_options)
-        c.smart_update(recipe_opts, opts)
-        opts = recipe_opts
-        opts.chapter_mark = 'none'
-        opts.dont_split_on_page_breaks = True
-        opf = glob.glob(os.path.join(tdir, '*.opf'))
-        if not opf:
-            raise Exception('Downloading of recipe: %s failed'%recipe_arg)
-        opf = opf[0]
-        
-        if opts.output is None:
-            fname = recipe.title + strftime(recipe.timefmt) + '.epub'
-            opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
-        
-        print 'Generating epub...'
-        opts.encoding = 'utf-8'
-        opts.remove_paragraph_spacing = True
-        html2epub(opf, opts, notification=notification)
-    
-
-def main(args=sys.argv, notification=None, handler=None):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) != 2 and opts.feeds is None:
-        parser.print_help()
-        return 1
-    recipe_arg = args[1] if len(args) > 1 else None
-    convert(opts, recipe_arg, notification=notification)
-        
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
\ No newline at end of file
diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py
deleted file mode 100644
index 3e1ec4c811..0000000000
--- a/src/calibre/ebooks/epub/from_html.py
+++ /dev/null
@@ -1,547 +0,0 @@
-from __future__ import with_statement
-__license__   = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
-__docformat__ = 'restructuredtext en'
-
-'''
-Conversion of HTML/OPF files follows several stages:
-
-    * All links in the HTML files or in the OPF manifest are
-    followed to build up a list of HTML files to be converted.
-    This stage is implemented by
-    :function:`calibre.ebooks.html.traverse` and
-    :class:`calibre.ebooks.html.HTMLFile`.
-
-    * The HTML is pre-processed to make it more semantic.
-    All links in the HTML files to other resources like images,
-    stylesheets, etc. are relativized. The resources are copied
-    into the `resources` sub directory. This is accomplished by
-    :class:`calibre.ebooks.html.PreProcessor` and
-    :class:`calibre.ebooks.html.Parser`.
-
-    * The HTML is processed. Various operations are performed.
-    All style declarations are extracted and consolidated into
-    a single style sheet. Chapters are auto-detected and marked.
-    Various font related manipulations are performed. See
-    :class:`HTMLProcessor`.
-
-    * The processed HTML is saved and the
-    :module:`calibre.ebooks.epub.split` module is used to split up
-    large HTML files into smaller chunks.
-
-    * The EPUB container is created.
-'''
-
-import os, sys, cStringIO, logging, re, functools, shutil
-
-from lxml.etree import XPath
-from lxml import html, etree
-from PyQt4.Qt import QApplication, QPixmap, Qt
-
-from calibre.ebooks.html_old import Processor, merge_metadata, get_filelist,\
-    opf_traverse, create_metadata, rebase_toc, Link, parser
-from calibre.ebooks.epub import config as common_config, tostring
-from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.metadata.toc import TOC
-from calibre.ebooks.metadata.opf2 import OPF
-from calibre.ebooks.epub import initialize_container, PROFILES
-from calibre.ebooks.epub.split import split
-from calibre.ebooks.epub.pages import add_page_map
-from calibre.ebooks.epub.fonts import Rationalizer
-from calibre.constants import preferred_encoding
-from calibre.customize.ui import run_plugins_on_postprocess
-from calibre import walk, CurrentDir, to_unicode, fit_image
-
-content = functools.partial(os.path.join, u'content')
-
-def remove_bad_link(element, attribute, link, pos):
-    if attribute is not None:
-        if element.tag in ['link']:
-            element.getparent().remove(element)
-        else:
-            element.set(attribute, '')
-            del element.attrib[attribute]
-
-def check_links(opf_path, pretty_print):
-    '''
-    Find and remove all invalid links in the HTML files
-    '''
-    logger = logging.getLogger('html2epub')
-    logger.info('\tChecking files for bad links...')
-    pathtoopf = os.path.abspath(opf_path)
-    with CurrentDir(os.path.dirname(pathtoopf)):
-        opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
-        html_files = []
-        for item in opf.itermanifest():
-            if 'html' in item.get('media-type', '').lower():
-                f = item.get('href').split('/')[-1]
-                if isinstance(f, str):
-                    f = f.decode('utf-8')
-                html_files.append(os.path.abspath(content(f)))
-
-        for path in html_files:
-            if not os.access(path, os.R_OK):
-                continue
-            base = os.path.dirname(path)
-            root = html.fromstring(open(content(path), 'rb').read(), parser=parser)
-            for element, attribute, link, pos in list(root.iterlinks()):
-                link = to_unicode(link)
-                plink = Link(link, base)
-                bad = False
-                if plink.path is not None and not os.path.exists(plink.path):
-                    bad = True
-                if bad:
-                    remove_bad_link(element, attribute, link, pos)
-            open(content(path), 'wb').write(tostring(root, pretty_print))
-
-def find_html_index(files):
-    '''
-    Given a list of files, find the most likely root HTML file in the
-    list.
-    '''
-    html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
-    html_files = [f for f in files if html_pat.search(f) is not None]
-    if not html_files:
-        raise ValueError(_('Could not find an ebook inside the archive'))
-    html_files = [(f, os.stat(f).st_size) for f in html_files]
-    html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
-    html_files = [f[0] for f in html_files]
-    for q in ('toc', 'index'):
-        for f in html_files:
-            if os.path.splitext(os.path.basename(f))[0].lower() == q:
-                return f, os.path.splitext(f)[1].lower()[1:]
-    return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
-
-def rescale_images(imgdir, screen_size, log):
-    pwidth, pheight = screen_size
-    if QApplication.instance() is None:
-        QApplication([])
-    for f in os.listdir(imgdir):
-        path = os.path.join(imgdir, f)
-        if os.path.splitext(f)[1] in ('.css', '.js'):
-            continue
-
-        p = QPixmap()
-        p.load(path)
-        if p.isNull():
-            continue
-        width, height = p.width(), p.height()
-        scaled, new_width, new_height = fit_image(width, height, pwidth,
-                pheight)
-        if scaled:
-            log.info('Rescaling image: '+f)
-            p.scaled(new_width, new_height, Qt.IgnoreAspectRatio,
-                    Qt.SmoothTransformation).save(path, 'JPEG')
-
-
-
-
-
-class HTMLProcessor(Processor, Rationalizer):
-
-    def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets):
-        Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
-                           name='html2epub')
-        if opts.verbose > 2:
-            self.debug_tree('parsed')
-        self.detect_chapters()
-
-        self.extract_css(stylesheets)
-        if self.opts.base_font_size2 > 0:
-            self.font_css = self.rationalize(self.external_stylesheets+[self.stylesheet],
-                                             self.root, self.opts)
-        if opts.verbose > 2:
-            self.debug_tree('nocss')
-
-        if hasattr(self.body, 'xpath'):
-            for script in list(self.body.xpath('descendant::script')):
-                script.getparent().remove(script)
-
-        self.fix_markup()
-
-    def convert_image(self, img):
-        rpath = img.get('src', '')
-        path = os.path.join(os.path.dirname(self.save_path()), *rpath.split('/'))
-        if os.path.exists(path) and os.path.isfile(path):
-            if QApplication.instance() is None:
-                app = QApplication([])
-                app
-            p = QPixmap()
-            p.load(path)
-            if not p.isNull():
-                p.save(path + '_calibre_converted.jpg')
-                os.remove(path)
-                for key, val in self.resource_map.items():
-                    if val == rpath:
-                        self.resource_map[key] = rpath+'_calibre_converted.jpg'
-        img.set('src', rpath+'_calibre_converted.jpg')
-
-    def fix_markup(self):
-        '''
-        Perform various markup transforms to get the output to render correctly
-        in the quirky ADE.
-        '''
-        # Replace <br> that are children of <body> as ADE doesn't handle them
-        if hasattr(self.body, 'xpath'):
-            for br in self.body.xpath('./br'):
-                if br.getparent() is None:
-                    continue
-                try:
-                    sibling = br.itersiblings().next()
-                except:
-                    sibling = None
-                br.tag = 'p'
-                br.text = u'\u00a0'
-                if (br.tail and br.tail.strip()) or sibling is None or \
-                   getattr(sibling, 'tag', '') != 'br':
-                    style = br.get('style', '').split(';')
-                    style = filter(None, map(lambda x: x.strip(), style))
-                    style.append('margin: 0pt; border:0pt; height:0pt')
-                    br.set('style', '; '.join(style))
-                else:
-                    sibling.getparent().remove(sibling)
-                    if sibling.tail:
-                        if not br.tail:
-                            br.tail = ''
-                        br.tail += sibling.tail
-
-
-        if self.opts.profile.remove_object_tags:
-            for tag in self.root.xpath('//embed'):
-                tag.getparent().remove(tag)
-            for tag in self.root.xpath('//object'):
-                if tag.get('type', '').lower().strip() in ('image/svg+xml',):
-                    continue
-                tag.getparent().remove(tag)
-
-
-        for tag in self.root.xpath('//title|//style'):
-            if not tag.text:
-                tag.getparent().remove(tag)
-        for tag in self.root.xpath('//script'):
-            if not tag.text and not tag.get('src', False):
-                tag.getparent().remove(tag)
-
-        for tag in self.root.xpath('//form'):
-            tag.getparent().remove(tag)
-
-        for tag in self.root.xpath('//center'):
-            tag.tag = 'div'
-            tag.set('style', 'text-align:center')
-
-        if self.opts.linearize_tables:
-            for tag in self.root.xpath('//table | //tr | //th | //td'):
-                tag.tag = 'div'
-
-        # ADE can't handle &amp; in an img url
-        for tag in self.root.xpath('//img[@src]'):
-            tag.set('src', tag.get('src', '').replace('&', ''))
-
-
-    def save(self):
-        for meta in list(self.root.xpath('//meta')):
-            meta.getparent().remove(meta)
-        # Strip all comments since Adobe DE is petrified of them
-        Processor.save(self, strip_comments=True)
-
-    def remove_first_image(self):
-        images = self.root.xpath('//img')
-        if images:
-            images[0].getparent().remove(images[0])
-            return True
-        return False
-
-
-
-
-def config(defaults=None):
-    return common_config(defaults=defaults)
-
-def option_parser():
-    c = config()
-    return c.option_parser(usage=_('''\
-%prog [options] file.html|opf
-
-Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file.
-If you specify an OPF file instead of an HTML file, the list of links is takes from
-the <spine> element of the OPF file.
-'''))
-
-def parse_content(filelist, opts, tdir):
-    os.makedirs(os.path.join(tdir, 'content', 'resources'))
-    resource_map, stylesheets = {}, {}
-    toc = TOC(base_path=tdir, type='root')
-    stylesheet_map = {}
-    first_image_removed = False
-    for htmlfile in filelist:
-        logging.getLogger('html2epub').debug('Processing %s...'%htmlfile)
-        hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
-                           resource_map, filelist, stylesheets)
-        if not first_image_removed and opts.remove_first_image:
-            first_image_removed = hp.remove_first_image()
-        hp.populate_toc(toc)
-        hp.save()
-        stylesheet_map[os.path.basename(hp.save_path())] = \
-            [s for s in hp.external_stylesheets + [hp.stylesheet, hp.font_css, hp.override_css] if s is not None]
-
-    logging.getLogger('html2epub').debug('Saving stylesheets...')
-    if opts.base_font_size2 > 0:
-        Rationalizer.remove_font_size_information(stylesheets.values())
-        for path, css in stylesheets.items():
-            raw = getattr(css, 'cssText', css)
-            if isinstance(raw, unicode):
-                raw = raw.encode('utf-8')
-            open(path, 'wb').write(raw)
-    if toc.count('chapter') > opts.toc_threshold:
-        toc.purge(['file', 'link', 'unknown'])
-    if toc.count('chapter') + toc.count('file') > opts.toc_threshold:
-        toc.purge(['link', 'unknown'])
-    toc.purge(['link'], max=opts.max_toc_links)
-
-    return resource_map, hp.htmlfile_map, toc, stylesheet_map
-
-TITLEPAGE = '''\
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
-    <head>
-        <title>Cover</title>
-        <style type="text/css" title="override_css">
-            @page {padding: 0pt; margin:0pt}
-            body { text-align: center; padding:0pt; margin: 0pt; }
-            div { margin: 0pt; padding: 0pt; }
-        </style>
-    </head>
-    <body>
-        <div>
-            <img src="%s" alt="cover" style="height: 100%%" />
-        </div>
-    </body>
-</html>
-'''
-
-def create_cover_image(src, dest, screen_size, rescale_cover=True):
-    try:
-        from PyQt4.Qt import QImage, Qt
-        if QApplication.instance() is None:
-            QApplication([])
-        im = QImage()
-        im.load(src)
-        if im.isNull():
-            raise ValueError('Invalid cover image')
-        if rescale_cover and screen_size is not None:
-            width, height = im.width(), im.height()
-            dw, dh = (screen_size[0]-width)/float(width), (screen_size[1]-height)/float(height)
-            delta = min(dw, dh)
-            if delta > 0:
-                nwidth = int(width + delta*(width))
-                nheight = int(height + delta*(height))
-                im = im.scaled(int(nwidth), int(nheight), Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
-        im.save(dest)
-    except:
-        import traceback
-        traceback.print_exc()
-        return False
-    return True
-
-def process_title_page(mi, filelist, htmlfilemap, opts, tdir):
-    old_title_page = None
-    f = lambda x : os.path.normcase(os.path.normpath(x))
-    if not isinstance(mi.cover, basestring):
-        mi.cover = None
-    if mi.cover:
-        if f(filelist[0].path) == f(mi.cover):
-            old_title_page = htmlfilemap[filelist[0].path]
-    #logger = logging.getLogger('html2epub')
-    metadata_cover = mi.cover
-    if metadata_cover and not os.path.exists(metadata_cover):
-        metadata_cover = None
-
-    cpath = '/'.join(('resources', '_cover_.jpg'))
-    cover_dest = os.path.join(tdir, 'content', *cpath.split('/'))
-    if metadata_cover is not None:
-        if not create_cover_image(metadata_cover, cover_dest,
-                                  opts.profile.screen_size):
-            metadata_cover = None
-    specified_cover = opts.cover
-    if specified_cover and not os.path.exists(specified_cover):
-        specified_cover = None
-    if specified_cover is not None:
-        if not create_cover_image(specified_cover, cover_dest,
-                                  opts.profile.screen_size):
-            specified_cover = None
-
-    cover = metadata_cover if specified_cover is None or (opts.prefer_metadata_cover and metadata_cover is not None) else specified_cover
-
-    if cover is not None:
-        titlepage = TITLEPAGE%cpath
-        tp = 'calibre_title_page.html' if old_title_page is None else old_title_page
-        tppath = os.path.join(tdir, 'content', tp)
-        with open(tppath, 'wb') as f:
-            f.write(titlepage)
-        return tp if old_title_page is None else None, True
-    elif os.path.exists(cover_dest):
-        os.remove(cover_dest)
-    return None, old_title_page is not None
-
-def find_oeb_cover(htmlfile):
-    if os.stat(htmlfile).st_size > 2048:
-        return None
-    match = re.search(r'(?i)<img[^<>]+src\s*=\s*[\'"](.+?)[\'"]', open(htmlfile, 'rb').read())
-    if match:
-        return match.group(1)
-
-def condense_ncx(ncx_path):
-    tree = etree.parse(ncx_path)
-    for tag in tree.getroot().iter(tag=etree.Element):
-        if tag.text:
-            tag.text = tag.text.strip()
-        if tag.tail:
-            tag.tail = tag.tail.strip()
-    compressed = etree.tostring(tree.getroot(), encoding='utf-8')
-    open(ncx_path, 'wb').write(compressed)
-
-def convert(htmlfile, opts, notification=None, create_epub=True,
-            oeb_cover=False, extract_to=None):
-    htmlfile = os.path.abspath(htmlfile)
-    if opts.output is None:
-        opts.output = os.path.splitext(os.path.basename(htmlfile))[0] + '.epub'
-    opts.profile = PROFILES[opts.profile]
-    opts.output = os.path.abspath(opts.output)
-    if opts.override_css is not None:
-        try:
-            opts.override_css = open(opts.override_css, 'rb').read().decode(preferred_encoding, 'replace')
-        except:
-            opts.override_css = opts.override_css.decode(preferred_encoding, 'replace')
-    if opts.from_opf:
-        opts.from_opf = os.path.abspath(opts.from_opf)
-    if opts.from_ncx:
-        opts.from_ncx = os.path.abspath(opts.from_ncx)
-    if htmlfile.lower().endswith('.opf'):
-        opf = OPF(htmlfile, os.path.dirname(os.path.abspath(htmlfile)))
-        filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
-        if not filelist:
-            # Bad OPF look for a HTML file instead
-            htmlfile = find_html_index(walk(os.path.dirname(htmlfile)))[0]
-            if htmlfile is None:
-                raise ValueError('Could not find suitable file to convert.')
-            filelist = get_filelist(htmlfile, opts)[1]
-        mi = merge_metadata(None, opf, opts)
-    else:
-        opf, filelist = get_filelist(htmlfile, opts)
-        mi = merge_metadata(htmlfile, opf, opts)
-    opts.chapter = XPath(opts.chapter,
-                    namespaces={'re':'http://exslt.org/regular-expressions'})
-    for x in (1, 2, 3):
-        attr = 'level%d_toc'%x
-        if getattr(opts, attr):
-            setattr(opts, attr, XPath(getattr(opts, attr),
-                      namespaces={'re':'http://exslt.org/regular-expressions'}))
-        else:
-            setattr(opts, attr, None)
-
-    with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir:
-        if opts.keep_intermediate:
-            print 'Intermediate files in', tdir
-        resource_map, htmlfile_map, generated_toc, stylesheet_map = \
-                                        parse_content(filelist, opts, tdir)
-        logger = logging.getLogger('html2epub')
-        resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
-
-
-        title_page, has_title_page = process_title_page(mi, filelist, htmlfile_map, opts, tdir)
-        spine = [htmlfile_map[f.path] for f in filelist]
-        if not oeb_cover and title_page is not None:
-            spine = [title_page] + spine
-        mi.cover = None
-        mi.cover_data = (None, None)
-
-
-        mi = create_metadata(tdir, mi, spine, resources)
-        buf = cStringIO.StringIO()
-        if mi.toc:
-            rebase_toc(mi.toc, htmlfile_map, tdir)
-        if opts.use_auto_toc or mi.toc is None or len(list(mi.toc.flat())) < 2:
-            mi.toc = generated_toc
-        if opts.from_ncx:
-            toc = TOC()
-            toc.read_ncx_toc(opts.from_ncx)
-            mi.toc = toc
-        for item in mi.manifest:
-            if getattr(item, 'mime_type', None) == 'text/html':
-                item.mime_type = 'application/xhtml+xml'
-        opf_path = os.path.join(tdir, 'metadata.opf')
-        with open(opf_path, 'wb') as f:
-            mi.render(f, buf, 'toc.ncx')
-        toc = buf.getvalue()
-        if toc:
-            with open(os.path.join(tdir, 'toc.ncx'), 'wb') as f:
-                f.write(toc)
-            if opts.show_ncx:
-                print toc
-        split(opf_path, opts, stylesheet_map)
-        if opts.page:
-            logger.info('\tBuilding page map...')
-            add_page_map(opf_path, opts)
-        check_links(opf_path, opts.pretty_print)
-
-        opf = OPF(opf_path, tdir)
-        opf.remove_guide()
-        oeb_cover_file = None
-        if oeb_cover and title_page is not None:
-            oeb_cover_file = find_oeb_cover(os.path.join(tdir, 'content', title_page))
-        if has_title_page or (oeb_cover and oeb_cover_file):
-            opf.create_guide_element()
-            if has_title_page and not oeb_cover:
-                opf.add_guide_item('cover', 'Cover', 'content/'+spine[0])
-            if oeb_cover and oeb_cover_file:
-                opf.add_guide_item('cover', 'Cover', 'content/'+oeb_cover_file)
-
-        cpath = os.path.join(tdir, 'content', 'resources', '_cover_.jpg')
-        if os.path.exists(cpath):
-            opf.add_path_to_manifest(cpath, 'image/jpeg')
-        with open(opf_path, 'wb') as f:
-            f.write(opf.render())
-        ncx_path = os.path.join(os.path.dirname(opf_path), 'toc.ncx')
-        if os.path.exists(ncx_path) and os.stat(ncx_path).st_size > opts.profile.flow_size:
-            logger.info('Condensing NCX from %d bytes...'%os.stat(ncx_path).st_size)
-            condense_ncx(ncx_path)
-            if os.stat(ncx_path).st_size > opts.profile.flow_size:
-                logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size)
-
-        if opts.profile.screen_size is not None:
-            rescale_images(os.path.join(tdir, 'content', 'resources'),
-                    opts.profile.screen_size, logger)
-
-        if create_epub:
-            epub = initialize_container(opts.output)
-            epub.add_dir(tdir)
-            epub.close()
-            run_plugins_on_postprocess(opts.output, 'epub')
-            logger.info(_('Output written to ')+opts.output)
-
-        if opts.show_opf:
-            print open(opf_path, 'rb').read()
-
-        if opts.extract_to is not None:
-            if os.path.exists(opts.extract_to):
-                shutil.rmtree(opts.extract_to)
-            shutil.copytree(tdir, opts.extract_to)
-
-        if extract_to is not None:
-            if os.path.exists(extract_to):
-                shutil.rmtree(extract_to)
-            shutil.copytree(tdir, extract_to)
-
-
-
-def main(args=sys.argv):
-    parser = option_parser()
-    opts, args = parser.parse_args(args)
-    if len(args) < 2:
-        parser.print_help()
-        print _('You must specify an input HTML file')
-        return 1
-    convert(args[1], opts)
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py
index 4ce13720e0..a43ca4e5e3 100644
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@@ -6,9 +6,15 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
+import os
+from urllib import unquote
 
 from calibre.customize.conversion import OutputFormatPlugin
-from calibre import CurrentDir
+from calibre.ptempfile import TemporaryDirectory
+from calibre.constants import __appname__, __version__
+from calibre import strftime, guess_type
+from lxml import etree
+
 
 class EPUBOutput(OutputFormatPlugin):
 
@@ -16,7 +22,218 @@ class EPUBOutput(OutputFormatPlugin):
     author = 'Kovid Goyal'
     file_type = 'epub'
 
+    TITLEPAGE_COVER = '''\
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+    <head>
+        <title>Cover</title>
+        <style type="text/css" title="override_css">
+            @page {padding: 0pt; margin:0pt}
+            body { text-align: center; padding:0pt; margin: 0pt; }
+            div { margin: 0pt; padding: 0pt; }
+        </style>
+    </head>
+    <body>
+        <div>
+            <img src="%s" alt="cover" style="height: 100%%" />
+        </div>
+    </body>
+</html>
+'''
+
+    TITLEPAGE = '''\
+<html  xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
+    <head>
+        <style type="text/css">
+            body {
+                background: white no-repeat fixed center center;
+                text-align: center;
+                vertical-align: center;
+                overflow: hidden;
+                font-size: 18px;
+            }
+            h1 { font-family: serif; }
+            h2, h4 { font-family: monospace; }
+        </style>
+    </head>
+    <body>
+        <h1>%(title)s</h1>
+        <br/><br/>
+        <div style="position:relative">
+            <div style="position: absolute; left: 0; top: 0; width:100%%; height:100%%; vertical-align:center">
+                <img src="%(img)s" alt="calibre" style="opacity:0.3"/>
+            </div>
+            <div style="position: absolute; left: 0; top: 0; width:100%%; height:100%%; vertical-align:center">
+                <h2>%(date)s</h2>
+                <br/><br/><br/><br/><br/>
+                <h3>%(author)s</h3>
+                <br/><br/></br/><br/><br/><br/><br/><br/><br/>
+                <h4>Produced by %(app)s</h4>
+            </div>
+        </div>
+    </body>
+</html>
+'''
+
     def convert(self, oeb, output_path, input_plugin, opts, log):
-        self.log, self.opts = log, opts
+        self.log, self.opts, self.oeb = log, opts, oeb
+
+        self.workaround_ade_quirks()
+
+        from calibre.ebooks.oeb.transforms.rescale import RescaleImages
+        RescaleImages()(oeb, opts)
+        self.insert_cover()
+
+        with TemporaryDirectory('_epub_output') as tdir:
+            from calibre.customize.ui import plugin_for_output_format
+            oeb_output = plugin_for_output_format('oeb')
+            oeb_output.convert(oeb, tdir, input_plugin, opts, log)
+            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
+            self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
+                    if x.endswith('.ncx')][0])
+
+            from calibre.epub import initialize_container
+            epub = initialize_container(output_path, os.path.basename(opf))
+            epub.add_dir(tdir)
+            epub.close()
+
+    def default_cover(self):
+        '''
+        Create a generic cover for books that dont have a cover
+        '''
+        try:
+            from calibre.gui2 import images_rc # Needed for access to logo
+            from PyQt4.Qt import QApplication, QFile, QIODevice
+        except:
+            return None
+        from calibre.ebooks.metadata import authors_to_string
+        images_rc
+        m = self.oeb.metadata
+        title = unicode(m.title[0])
+        a = [unicode(x) for x in m.creators if m.role == 'aut']
+        author = authors_to_string(a)
+        if QApplication.instance() is None: QApplication([])
+        f = QFile(':/library')
+        f.open(QIODevice.ReadOnly)
+        img_data = str(f.readAll())
+        id, href = self.oeb.manifest.generate('calibre-logo',
+                'calibre-logo.png')
+        self.oeb.manifest.add(id, href, 'image/png', data=img_data)
+        html = self.TITLEPAGE%dict(title=title, author=author,
+                date=strftime('%d %b, %Y'),
+                app=__appname__ +' '+__version__,
+                img=href)
+        id, href = self.oeb.manifest.generate('calibre-titlepage',
+                'calibre-titlepage.xhtml')
+        return self.oeb.manifest.add(id, href, guess_type('t.xhtml')[0],
+                data=etree.fromstring(html))
+
+
+    def insert_cover(self):
+        from calibre.ebooks.oeb.base import urldefrag
+        from calibre import guess_type
+        g, m = self.oeb.guide, self.oeb.manifest
+        if 'titlepage' not in g:
+            if 'cover' in g:
+                tp = self.TITLEPAGE_COVER%unquote(g['cover'].href)
+                id, href = m.generate('titlepage', 'titlepage.xhtml')
+                item = m.add(id, href, guess_type('t.xhtml'),
+                        data=etree.fromstring(tp))
+            else:
+                item = self.default_cover()
+        else:
+            item = self.oeb.manifest.hrefs[
+                    urldefrag(self.oeb.guide['titlepage'].href)[0]]
+        if item is not None:
+            self.oeb.spine.insert(0, item, True)
+            self.oeb.guide.refs['cover'].href = item.href
+            self.oeb.guide.refs['titlepage'].href = item.href
+
+
+
+    def condense_ncx(self, ncx_path):
+        if not self.opts.pretty_print:
+            tree = etree.parse(ncx_path)
+            for tag in tree.getroot().iter(tag=etree.Element):
+                if tag.text:
+                    tag.text = tag.text.strip()
+                if tag.tail:
+                    tag.tail = tag.tail.strip()
+            compressed = etree.tostring(tree.getroot(), encoding='utf-8')
+            open(ncx_path, 'wb').write(compressed)
+
+
+
+    def workaround_ade_quirks(self):
+        '''
+        Perform various markup transforms to get the output to render correctly
+        in the quirky ADE.
+        '''
+        from calibre.ebooks.oeb.base import XPNSMAP, XHTML
+        from lxml.etree import XPath as _XPath
+        from functools import partial
+        XPath = partial(_XPath, namespaces=XPNSMAP)
+
+        for x in self.oeb.spine:
+            root = x.data
+            body = XPath('//h:body')(root)
+            if body:
+                body = body[0]
+            # Replace <br> that are children of <body> as ADE doesn't handle them
+            if hasattr(body, 'xpath'):
+                for br in body.xpath('./h:br'):
+                    if br.getparent() is None:
+                        continue
+                    try:
+                        sibling = br.itersiblings().next()
+                    except:
+                        sibling = None
+                    br.tag = XHTML('p')
+                    br.text = u'\u00a0'
+                    if (br.tail and br.tail.strip()) or sibling is None or \
+                    getattr(sibling, 'tag', '') != XHTML('br'):
+                        style = br.get('style', '').split(';')
+                        style = filter(None, map(lambda x: x.strip(), style))
+                        style.append('margin: 0pt; border:0pt; height:0pt')
+                        br.set('style', '; '.join(style))
+                    else:
+                        sibling.getparent().remove(sibling)
+                        if sibling.tail:
+                            if not br.tail:
+                                br.tail = ''
+                            br.tail += sibling.tail
+
+
+            if self.opts.output_profile.remove_object_tags:
+                for tag in root.xpath('//h:embed'):
+                    tag.getparent().remove(tag)
+                for tag in root.xpath('//h:object'):
+                    if tag.get('type', '').lower().strip() in ('image/svg+xml',):
+                        continue
+                    tag.getparent().remove(tag)
+
+            for tag in root.xpath('//h:title|//h:style'):
+                if not tag.text:
+                    tag.getparent().remove(tag)
+            for tag in root.xpath('//h:script'):
+                if not tag.text and not tag.get('src', False):
+                    tag.getparent().remove(tag)
+
+            for tag in root.xpath('//h:form'):
+                tag.getparent().remove(tag)
+
+            for tag in root.xpath('//h:center'):
+                tag.tag = XHTML('div')
+                tag.set('style', 'text-align:center')
+
+            # ADE can't handle &amp; in an img url
+            for tag in self.root.xpath('//h:img[@src]'):
+                tag.set('src', tag.get('src', '').replace('&', ''))
+
+            stylesheet = self.oeb.manifest.hrefs['stylesheet.css']
+            stylesheet.data.add('a { color: inherit; text-decoration: inherit; '
+                    'cursor: default; }')
+            stylesheet.data.add('a[href] { color: blue; '
+                    'text-decoration: underline; cursor:pointer; }')
+
 
 
diff --git a/src/calibre/ebooks/oeb/iterator.py b/src/calibre/ebooks/oeb/iterator.py
index ab3e90083d..ffafa6d1a2 100644
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@@ -12,13 +12,15 @@ from cStringIO import StringIO
 from PyQt4.Qt import QFontDatabase
 
 from calibre.customize.ui import available_input_formats
-from calibre.ebooks.epub.from_html import TITLEPAGE
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.zipfile import safe_replace, ZipFile
 from calibre.utils.config import DynamicConfig
 from calibre.utils.logging import Log
+from calibre.ebooks.epub.output import EPUBOutput
+
+TITLEPAGE = EPUBOutput.TITLEPAGE_COVER
 
 def character_count(html):
     '''
diff --git a/src/calibre/ebooks/oeb/transforms/guide.py b/src/calibre/ebooks/oeb/transforms/guide.py
index 06153c5a48..00830b1a8c 100644
--- a/src/calibre/ebooks/oeb/transforms/guide.py
+++ b/src/calibre/ebooks/oeb/transforms/guide.py
@@ -14,7 +14,10 @@ class Clean(object):
         from calibre.ebooks.oeb.base import urldefrag
         self.oeb, self.log, self.opts = oeb, oeb.log, opts
 
-        cover_href = ''
+        protected_hrefs = set([])
+        if 'titlepage' in self.oeb.guide:
+            protected_hrefs.add(urldefrag(
+                self.oeb.guide['titlepage'].href)[0])
         if 'cover' not in self.oeb.guide:
             covers = []
             for x in ('other.ms-coverimage-standard',
@@ -32,15 +35,15 @@ class Clean(object):
                     self.log('Choosing %s:%s as the cover'%(ref.type, ref.href))
                 ref.type = 'cover'
                 self.oeb.guide.refs['cover'] = ref
-                cover_href = urldefrag(ref.href)[0]
+                protected_hrefs.add(urldefrag(ref.href)[0])
         else:
-            cover_href = urldefrag(self.oeb.guide.refs['cover'].href)[0]
+            protected_hrefs.add(urldefrag(self.oeb.guide.refs['cover'].href)[0])
 
         for x in list(self.oeb.guide):
             href = urldefrag(self.oeb.guide[x].href)[0]
-            if x.lower() != 'cover':
+            if x.lower() != ('cover', 'titlepage'):
                 try:
-                    if href != cover_href:
+                    if href not in protected_hrefs:
                         self.oeb.manifest.remove(self.oeb.manifest.hrefs[href])
                 except KeyError:
                     pass
diff --git a/src/calibre/ebooks/oeb/transforms/rescale.py b/src/calibre/ebooks/oeb/transforms/rescale.py
new file mode 100644
index 0000000000..5b62e5fda5
--- /dev/null
+++ b/src/calibre/ebooks/oeb/transforms/rescale.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre import fit_image
+
+class RescaleImages(object):
+    'Rescale all images to fit inside given screen size'
+
+    def __call__(self, oeb, opts):
+        from PyQt4.Qt import QApplication, QImage, Qt
+        from calibre.gui2 import pixmap_to_data
+        self.oeb, self.opts, self.log = oeb, opts, oeb.log
+        page_width, page_height = opts.dest.width, opts.dest.height
+        for item in oeb.manifest:
+            if item.media_type.startswith('image'):
+                raw = item.data
+                if not raw: continue
+                if QApplication.instance() is None:
+                    QApplication([])
+
+                img = QImage(10, 10, QImage.Format_ARGB32_Premultiplied)
+                if not img.loadFromData(raw): continue
+                width, height = img.width(), img.height()
+                scaled, new_width, new_height = fit_image(width, height,
+                        page_width, page_height)
+                if scaled:
+                    self.log('Rescaling image', item.href)
+                    img = img.scaled(new_width, new_height,
+                            Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
+                    item.data = pixmap_to_data(img)
+
+
diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py
index b54b0ebce0..d3505a5fd9 100644
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@@ -17,7 +17,7 @@ from lxml.cssselect import CSSSelector
 
 from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \
         urldefrag, rewrite_links, urlunquote
-from calibre.ebooks.epub import tostring, rules
+from calibre.ebooks.epub import rules
 
 
 XPath = functools.partial(_XPath, namespaces=NAMESPACES)
@@ -25,6 +25,9 @@ XPath = functools.partial(_XPath, namespaces=NAMESPACES)
 SPLIT_ATTR       = 'cs'
 SPLIT_POINT_ATTR = 'csp'
 
+def tostring(root):
+    return etree.tostring(root, encoding='utf-8')
+
 class SplitError(ValueError):
 
     def __init__(self, path, root):
diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py
index 6499a5e9c4..9240873346 100644
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@@ -11,7 +11,7 @@ import re
 from lxml import etree
 from urlparse import urlparse
 
-from calibre.ebooks.oeb.base import XPNSMAP, TOC
+from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
 XPath = lambda x: etree.XPath(x, namespaces=XPNSMAP)
 
 class DetectStructure(object):
@@ -63,11 +63,11 @@ class DetectStructure(object):
                 if chapter_mark == 'none':
                     continue
                 elif chapter_mark == 'rule':
-                    mark = etree.Element('hr')
+                    mark = etree.Element(XHTML('hr'))
                 elif chapter_mark == 'pagebreak':
-                    mark = etree.Element('div', style=page_break_after)
+                    mark = etree.Element(XHTML('div'), style=page_break_after)
                 else: # chapter_mark == 'both':
-                    mark = etree.Element('hr', style=page_break_before)
+                    mark = etree.Element(XHTML('hr'), style=page_break_before)
                 elem.addprevious(mark)
 
     def create_level_based_toc(self):
@@ -114,12 +114,13 @@ class DetectStructure(object):
     def add_leveled_toc_items(self, item):
         level1 = XPath(self.opts.level1_toc)(item.data)
         level1_order = []
+        document = item
 
         counter = 1
         if level1:
             added = {}
             for elem in level1:
-                text, _href = self.elem_to_link(item, elem, counter)
+                text, _href = self.elem_to_link(document, elem, counter)
                 counter += 1
                 if text:
                     node = self.oeb.toc.add(text, _href,
@@ -132,11 +133,11 @@ class DetectStructure(object):
                 level2 = list(XPath(self.opts.level2_toc)(item.data))
                 for elem in level2:
                     level1 = None
-                    for item in item.data.iterdescendants():
+                    for item in document.data.iterdescendants():
                         if item in added.keys():
                             level1 = added[item]
                         elif item == elem and level1 is not None:
-                            text, _href = self.elem_to_link(item, elem, counter)
+                            text, _href = self.elem_to_link(document, elem, counter)
                             counter += 1
                             if text:
                                 added2[elem] = level1.add(text, _href,
@@ -145,12 +146,12 @@ class DetectStructure(object):
                     level3 = list(XPath(self.opts.level3_toc)(item.data))
                     for elem in level3:
                         level2 = None
-                        for item in item.data.iterdescendants():
+                        for item in document.data.iterdescendants():
                             if item in added2.keys():
                                 level2 = added2[item]
                             elif item == elem and level2 is not None:
                                 text, _href = \
-                                        self.elem_to_link(item, elem, counter)
+                                        self.elem_to_link(document, elem, counter)
                                 counter += 1
                                 if text:
                                     level2.add(text, _href,