Sync to pluginize

2025-08-11 09:13:57 -04:00 · 2009-04-24 20:24:15 -04:00 · 2009-04-24 20:24:15 -04:00 · 202958cb4d
commit 202958cb4d
parent 06aa8f8361 6fe1590813
30 changed files with 666 additions and 4075 deletions
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -287,6 +287,7 @@ from calibre.ebooks.odt.input import ODTInput
 from calibre.ebooks.rtf.input import RTFInput
 from calibre.ebooks.html.input import HTMLInput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.epub.output import EPUBOutput
 from calibre.ebooks.txt.output import TXTOutput
 from calibre.ebooks.pdf.output import PDFOutput
 from calibre.ebooks.pdb.ereader.output import EREADEROutput
@ -294,7 +295,7 @@ from calibre.customize.profiles import input_profiles, output_profiles
 plugins = [HTML2ZIP, EPUBInput, MOBIInput, PDBInput, PDFInput, HTMLInput,
        TXTInput, OEBOutput, TXTOutput, PDFOutput, LITInput,
-        FB2Input, ODTInput, RTFInput, EREADEROutput]
+        FB2Input, ODTInput, RTFInput, EPUBOutput, EREADEROutput]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -3,7 +3,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import sys, re
+import re
 from itertools import izip
 from calibre.customize import Plugin as _Plugin
@ -22,7 +22,7 @@ class Plugin(_Plugin):
    fbase  = 12
    fsizes = [5, 7, 9, 12, 13.5, 17, 20, 22, 24]
-    screen_size = (800, 600)
+    screen_size = (1600, 1200)
    dpi = 100
    def __init__(self, *args, **kwargs):
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -117,6 +117,9 @@ def add_pipeline_options(parser, plumber):
                      'line_height',
                      'linearize_tables',
                      'extra_css',
                      'margin_top', 'margin_left', 'margin_right',
                      'margin_bottom', 'dont_justify',
                      'insert_blank_line', 'remove_paragraph_spacing',
                  ]
                  ),
@ -124,6 +127,8 @@ def add_pipeline_options(parser, plumber):
                  _('Control auto-detection of document structure.'),
                  [
                      'dont_split_on_page_breaks', 'chapter', 'chapter_mark',
                      'prefer_metadata_cover', 'remove_first_image',
                      'insert_comments',
                  ]
                  ),
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -195,7 +195,7 @@ OptionRecommendation(name='toc_filter',
 OptionRecommendation(name='chapter',
        recommended_value="//*[((name()='h1' or name()='h2') and "
-              "re:test(., 'chapter|book|section|part', 'i')) or @class "
+              r"re:test(., 'chapter|book|section|part\s+', 'i')) or @class "
              "= 'chapter']", level=OptionRecommendation.LOW,
            help=_('An XPath expression to detect chapter titles. The default '
                'is to consider <h1> or <h2> tags that contain the words '
@ -227,6 +227,64 @@ OptionRecommendation(name='extra_css',
                'rules.')
        ),
 OptionRecommendation(name='margin_top',
        recommended_value=5.0, level=OptionRecommendation.LOW,
        help=_('Set the top margin in pts. Default is %default')),
 OptionRecommendation(name='margin_bottom',
        recommended_value=5.0, level=OptionRecommendation.LOW,
        help=_('Set the bottom margin in pts. Default is %default')),
 OptionRecommendation(name='margin_left',
        recommended_value=5.0, level=OptionRecommendation.LOW,
        help=_('Set the left margin in pts. Default is %default')),
 OptionRecommendation(name='margin_right',
        recommended_value=5.0, level=OptionRecommendation.LOW,
        help=_('Set the right margin in pts. Default is %default')),
 OptionRecommendation(name='dont_justify',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Do not force text to be justified in output. Whether text '
            'is actually displayed justified or not depends on whether '
            'the ebook format and reading device support justification.')
        ),
 OptionRecommendation(name='remove_paragraph_spacing',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Remove spacing between paragraphs. Also sets an indent on '
        'paragraphs of 1.5em. Spacing removal will not work '
        'if the source file does not use paragraphs (<p> or <div> tags).')
        ),
 OptionRecommendation(name='prefer_metadata_cover',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Use the cover detected from the source file in preference '
        'to the specified cover.')
        ),
 OptionRecommendation(name='insert_blank_line',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Insert a blank line between paragraphs. Will not work '
            'if the source file does not use paragraphs (<p> or <div> tags).'
            )
        ),
 OptionRecommendation(name='remove_first_image',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Remove the first image from the input ebook. Useful if the '
        'first image in the source file is a cover and you are specifying '
        'an external cover.'
            )
        ),
 OptionRecommendation(name='insert_comments',
        recommended_value=False, level=OptionRecommendation.LOW,
        help=_('Insert the comments/summary from the book metadata at the start of '
            'the book. This is useful if your ebook reader does not support '
            'displaying the comments from the metadata.'
            )
        ),
 OptionRecommendation(name='read_metadata_from_opf',
@ -244,7 +302,8 @@ OptionRecommendation(name='title',
 OptionRecommendation(name='authors',
    recommended_value=None, level=OptionRecommendation.LOW,
-    help=_('Set the authors. Multiple authors should be separated ')),
+    help=_('Set the authors. Multiple authors should be separated by '
    'ampersands.')),
 OptionRecommendation(name='title_sort',
    recommended_value=None, level=OptionRecommendation.LOW,
@ -428,7 +487,6 @@ OptionRecommendation(name='language',
            mi.cover = None
        self.user_metadata = mi
    def setup_options(self):
        '''
        Setup the `self.opts` object.
@ -479,9 +537,16 @@ OptionRecommendation(name='language',
        if not hasattr(self.oeb, 'manifest'):
            self.oeb = create_oebbook(self.log, self.oeb, self.opts)
        from calibre.ebooks.oeb.transforms.guide import Clean
        Clean()(self.oeb, self.opts)
        self.opts.source = self.opts.input_profile
        self.opts.dest = self.opts.output_profile
        from calibre.ebooks.oeb.transforms.metadata import MergeMetadata
        MergeMetadata()(self.oeb, self.user_metadata,
                self.opts.prefer_metadata_cover)
        from calibre.ebooks.oeb.transforms.structure import DetectStructure
        DetectStructure()(self.oeb, self.opts)
@ -495,6 +560,9 @@ OptionRecommendation(name='language',
        else:
            fkey = map(float, fkey.split(','))
        from calibre.ebooks.oeb.transforms.jacket import Jacket
        Jacket()(self.oeb, self.opts)
        if self.opts.extra_css and os.path.exists(self.opts.extra_css):
            self.opts.extra_css = open(self.opts.extra_css, 'rb').read()
--- a/src/calibre/ebooks/epub/init.py
+++ b/src/calibre/ebooks/epub/init.py
@ -6,32 +6,7 @@ __docformat__ = 'restructuredtext en'
 '''
 Conversion to EPUB.
 '''
 import sys, textwrap, re, os, uuid
 from itertools import cycle
 from calibre.utils.config import Config, StringConfig
 from calibre.utils.zipfile import ZipFile, ZIP_STORED
 from calibre.ebooks.html import tostring
 from lxml import etree
 class DefaultProfile(object):
    flow_size            = sys.maxint
    screen_size          = None
    remove_special_chars = False
    remove_object_tags   = False
 class PRS505(DefaultProfile):
    flow_size            = 270000
    screen_size          = (590, 765)
    remove_special_chars = re.compile(u'[\u200b\u00ad]')
    remove_object_tags   = True
 PROFILES = {
            'PRS505' : PRS505,
            'None'   : DefaultProfile,
            }
 def rules(stylesheets):
    for s in stylesheets:
@ -58,152 +33,4 @@ def initialize_container(path_to_container, opf_name='metadata.opf'):
    zf.writestr('META-INF/container.xml', CONTAINER)
    return zf
 def config(defaults=None, name='epub'):
    desc = _('Options to control the conversion to EPUB')
    if defaults is None:
        c = Config(name, desc)
    else:
        c = StringConfig(defaults, desc)
    c.update(common_config())
    c.remove_opt('output')
    c.remove_opt('zip')
    c.add_opt('output', ['-o', '--output'], default=None,
             help=_('The output EPUB file. If not specified, it is '
                    'derived from the input file name.'))
    c.add_opt('profile', ['--profile'], default='PRS505', choices=list(PROFILES.keys()),
              help=_('Profile of the target device this EPUB is meant for. '
                     'Set to None to create a device independent EPUB. '
                     'The profile is used for device specific restrictions '
                     'on the EPUB. Choices are: ')+str(list(PROFILES.keys())))
    c.add_opt('override_css', ['--override-css'], default=None,
              help=_('Either the path to a CSS stylesheet or raw CSS. '
                     'This CSS will override any existing CSS '
                     'declarations in the source files.'))
    structure = c.add_group('structure detection',
                            _('Control auto-detection of document structure.'))
    structure('chapter', ['--chapter'],
              default="//*[re:match(name(), 'h[1-2]') and "
              "re:test(., 'chapter|book|section|part', 'i')] | "
              "//*[@class = 'chapter']",
            help=_('''\
 An XPath expression to detect chapter titles. The default is to consider <h1> or
 <h2> tags that contain the words "chapter","book","section" or "part" as chapter titles as
 well as any tags that have class="chapter".
 The expression used must evaluate to a list of elements. To disable chapter detection,
 use the expression "/". See the XPath Tutorial in the calibre User Manual for further
 help on using this feature.
 ''').replace('\n', ' '))
    structure('chapter_mark', ['--chapter-mark'], choices=['pagebreak', 'rule', 'both', 'none'],
              default='pagebreak',
              help=_('Specify how to mark detected chapters. A value of '
                     '"pagebreak" will insert page breaks before chapters. '
                     'A value of "rule" will insert a line before chapters. '
                     'A value of "none" will disable chapter marking and a '
                     'value of "both" will use both page breaks and lines '
                     'to mark chapters.'))
    structure('cover', ['--cover'], default=None,
              help=_('Path to the cover to be used for this book'))
    structure('prefer_metadata_cover', ['--prefer-metadata-cover'], default=False,
              action='store_true',
              help=_('Use the cover detected from the source file in preference '
                     'to the specified cover.'))
    structure('remove_first_image', ['--remove-first-image'], default=False,
              help=_('Remove the first image from the input ebook. Useful if '
                     'the first image in the source file is a cover and you '
                     'are specifying an external cover.'))
    structure('dont_split_on_page_breaks', ['--dont-split-on-page-breaks'], default=False,
              help=_('Turn off splitting at page breaks. Normally, input files '
                     'are automatically split at every page break into '
                     'two files. This gives an output ebook that can be parsed '
                     'faster and with less resources. However, splitting is '
                     'slow and if your source file contains a very large '
                     'number of page breaks, you should turn off splitting '
                     'on page breaks.'))
    structure('page', ['--page'], default=None,
              help=_('XPath expression to detect page boundaries for building '
                     'a custom pagination map, as used by AdobeDE. Default is '
                     'not to build an explicit pagination map.'))
    structure('page_names', ['--page-names'], default=None,
              help=_('XPath expression to find the name of each page in the '
                     'pagination map relative to its boundary element. '
                     'Default is to number all pages staring with 1.'))
    toc = c.add_group('toc',
        _('''\
 Control the automatic generation of a Table of Contents. If an OPF file is detected
 and it specifies a Table of Contents, then that will be used rather than trying
 to auto-generate a Table of Contents.
 ''').replace('\n', ' '))
    toc('max_toc_links', ['--max-toc-links'], default=50,
        help=_('Maximum number of links to insert into the TOC. Set to 0 '
               'to disable. Default is: %default. Links are only added to the '
               'TOC if less than the --toc-threshold number of chapters were detected.'))
    toc('no_chapters_in_toc', ['--no-chapters-in-toc'], default=False,
        help=_("Don't add auto-detected chapters to the Table of Contents."))
    toc('toc_threshold', ['--toc-threshold'], default=6,
        help=_('If fewer than this number of chapters is detected, then links '
               'are added to the Table of Contents. Default: %default'))
    toc('level1_toc', ['--level1-toc'], default=None,
        help=_('XPath expression that specifies all tags that should be added '
               'to the Table of Contents at level one. If this is specified, '
               'it takes precedence over other forms of auto-detection.'))
    toc('level2_toc', ['--level2-toc'], default=None,
        help=_('XPath expression that specifies all tags that should be added '
               'to the Table of Contents at level two. Each entry is added '
               'under the previous level one entry.'))
    toc('level3_toc', ['--level3-toc'], default=None,
        help=_('XPath expression that specifies all tags that should be added '
               'to the Table of Contents at level three. Each entry is added '
               'under the previous level two entry.'))
    toc('from_ncx', ['--from-ncx'], default=None,
        help=_('Path to a .ncx file that contains the table of contents to use '
               'for this ebook. The NCX file should contain links relative to '
               'the directory it is placed in. See '
               'http://www.niso.org/workrooms/daisy/Z39-86-2005.html#NCX for '
               'an overview of the NCX format.'))
    toc('use_auto_toc', ['--use-auto-toc'], default=False,
        help=_('Normally, if the source file already has a Table of Contents, '
               'it is used in preference to the auto-generated one. '
               'With this option, the auto-generated one is always used.'))
    layout = c.add_group('page layout', _('Control page layout'))
    layout('margin_top', ['--margin-top'], default=5.0,
           help=_('Set the top margin in pts. Default is %default'))
    layout('margin_bottom', ['--margin-bottom'], default=5.0,
           help=_('Set the bottom margin in pts. Default is %default'))
    layout('margin_left', ['--margin-left'], default=5.0,
           help=_('Set the left margin in pts. Default is %default'))
    layout('margin_right', ['--margin-right'], default=5.0,
           help=_('Set the right margin in pts. Default is %default'))
    layout('base_font_size2', ['--base-font-size'], default=12.0,
           help=_('The base font size in pts. Default is %defaultpt. '
                  'Set to 0 to disable rescaling of fonts.'))
    layout('remove_paragraph_spacing', ['--remove-paragraph-spacing'], default=False,
           help=_('Remove spacing between paragraphs. '
                  'Also sets a indent on paragraphs of 1.5em. '
                  'You can override this by adding p {text-indent: 0cm} to '
                  '--override-css. Spacing removal will not work if the source '
                  'file forces inter-paragraph spacing.'))
    layout('no_justification', ['--no-justification'], default=False,
           help=_('Do not force text to be justified in output.'))
    layout('linearize_tables', ['--linearize-tables'], default=False,
           help=_('Remove table markup, converting it into paragraphs. '
                  'This is useful if your source file uses a table to manage layout.'))
    layout('preserve_tag_structure', ['--preserve-tag-structure'], default=False,
           help=_('Preserve the HTML tag structure while splitting large HTML files. '
                  'This is only neccessary if the HTML files contain CSS that '
                  'uses sibling selectors. Enabling this greatly slows down '
                  'processing of large HTML files.'))
    c.add_opt('show_opf', ['--show-opf'], default=False, group='debug',
              help=_('Print generated OPF file to stdout'))
    c.add_opt('show_ncx', ['--show-ncx'], default=False, group='debug',
              help=_('Print generated NCX file to stdout'))
    c.add_opt('keep_intermediate', ['--keep-intermediate-files'], group='debug',
              default=False,
              help=_('Keep intermediate files during processing by html2epub'))
    c.add_opt('extract_to', ['--extract-to'], group='debug', default=None,
              help=_('Extract the contents of the produced EPUB file to the '
                     'specified directory.'))
    return c
--- a/src/calibre/ebooks/epub/fonts.py
+++ b/src/calibre/ebooks/epub/fonts.py
@ -1,300 +0,0 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 Font size rationalization. See :function:`relativize`.
 '''
 import logging, re, operator, functools, collections, unittest, copy, sys
 from xml.dom import SyntaxErr
 from lxml.cssselect import CSSSelector
 from lxml import etree
 from lxml.html import HtmlElement
 from calibre.ebooks.html_old import fromstring
 from calibre.ebooks.epub import rules
 from cssutils import CSSParser
 num           = r'[-]?\d+|[-]?\d*\.\d+'
 length        = r'(?P<zero>0)|(?P<num>{num})(?P<unit>%|em|ex|px|in|cm|mm|pt|pc)'.replace('{num}', num)
 absolute_size = r'(?P<abs>(x?x-)?(small|large)|medium)'
 relative_size = r'(?P<rel>smaller|larger)'
 font_size_pat   = re.compile('|'.join((relative_size, absolute_size, length)), re.I)
 line_height_pat = re.compile(r'({num})(px|in|cm|mm|pt|pc)'.replace('{num}', num))
 PTU = {
       'in' : 72.,
       'cm' : 72/2.54,
       'mm' : 72/25.4,
       'pt' : 1.0,
       'pc' : 1/12.,
       }
 DEFAULT_FONT_SIZE = 12
 class Rationalizer(object):
    @classmethod
    def specificity(cls, s):
        '''Map CSS specificity tuple to a single integer'''
        return sum([10**(4-i) + x for i,x in enumerate(s)])
    @classmethod
    def compute_font_size(cls, elem):
        '''
        Calculate the effective font size of an element traversing its ancestors as far as
        neccessary.
        '''
        cfs = elem.computed_font_size
        if cfs is not None:
            return
        sfs = elem.specified_font_size
        if callable(sfs):
            parent = elem.getparent()
            cls.compute_font_size(parent)
            elem.computed_font_size = sfs(parent.computed_font_size)
        else:
            elem.computed_font_size = sfs
    @classmethod
    def calculate_font_size(cls, style):
        'Return font size in pts from style object. For relative units returns a callable'
        match = font_size_pat.search(style.font)
        fs = ''
        if match:
            fs = match.group()
        if style.fontSize:
            fs = style.fontSize
        match = font_size_pat.search(fs)
        if match is None:
            return None
        match = match.groupdict()
        unit = match.get('unit', '')
        if unit: unit = unit.lower()
        if unit in PTU.keys():
            return PTU[unit] * float(match['num'])
        if unit in ('em', 'ex'):
            return functools.partial(operator.mul, float(match['num']))
        if unit == '%':
            return functools.partial(operator.mul, float(match['num'])/100.)
        abs = match.get('abs', '')
        if abs: abs = abs.lower()
        if abs:
            x = (1.2)**(abs.count('x') * (-1 if 'small' in abs else 1))
            return 12 * x
        if match.get('zero', False):
            return 0.
        return functools.partial(operator.mul, 1.2) if 'larger' in fs.lower() else functools.partial(operator.mul, 0.8)
    @classmethod
    def resolve_rules(cls, stylesheets):
        for sheet in stylesheets:
            if hasattr(sheet, 'fs_rules'):
                continue
            sheet.fs_rules = []
            sheet.lh_rules = []
            for r in sheet:
                if r.type == r.STYLE_RULE:
                    font_size = cls.calculate_font_size(r.style)
                    if font_size is not None:
                        for s in r.selectorList:
                            sheet.fs_rules.append([CSSSelector(s.selectorText), font_size])
                    orig = line_height_pat.search(r.style.lineHeight)
                    if orig is not None:
                        for s in r.selectorList:
                            sheet.lh_rules.append([CSSSelector(s.selectorText), float(orig.group(1)) * PTU[orig.group(2).lower()]])
    @classmethod
    def apply_font_size_rules(cls, stylesheets, root):
        'Add a ``specified_font_size`` attribute to every element that has a specified font size'
        cls.resolve_rules(stylesheets)
        for sheet in stylesheets:
            for selector, font_size in sheet.fs_rules:
                elems = selector(root)
                for elem in elems:
                    elem.specified_font_size = font_size
    @classmethod
    def remove_font_size_information(cls, stylesheets):
        for r in rules(stylesheets):
            r.style.removeProperty('font-size')
            try:
                new = font_size_pat.sub('', r.style.font).strip()
                if new:
                    r.style.font = new
                else:
                    r.style.removeProperty('font')
            except SyntaxErr:
                r.style.removeProperty('font')
            if line_height_pat.search(r.style.lineHeight) is not None:
                r.style.removeProperty('line-height')
    @classmethod
    def compute_font_sizes(cls, root, stylesheets, base=12):
        stylesheets = [s for s in stylesheets if hasattr(s, 'cssText')]
        cls.apply_font_size_rules(stylesheets, root)
        # Compute the effective font size of all tags
        root.computed_font_size = DEFAULT_FONT_SIZE
        for elem in root.iter(etree.Element):
            cls.compute_font_size(elem)
        extra_css = {}
        if base > 0:
            # Calculate the "base" (i.e. most common) font size
            font_sizes = collections.defaultdict(lambda : 0)
            body = root.xpath('//body')[0]
            IGNORE = ('h1', 'h2', 'h3', 'h4', 'h5', 'h6')
            for elem in body.iter(etree.Element):
                if elem.tag not in IGNORE:
                    t = getattr(elem, 'text', '')
                    if t: t = t.strip()
                    if t:
                        font_sizes[elem.computed_font_size] += len(t)
                t = getattr(elem, 'tail', '')
                if t: t = t.strip()
                if t:
                    parent = elem.getparent()
                    if parent.tag not in IGNORE:
                        font_sizes[parent.computed_font_size] += len(t)
            try:
                most_common = max(font_sizes.items(), key=operator.itemgetter(1))[0]
                scale = base/most_common if most_common > 0 else 1.
            except ValueError:
                scale = 1.
            # rescale absolute line-heights
            counter = 0
            for sheet in stylesheets:
                for selector, lh in sheet.lh_rules:
                    for elem in selector(root):
                        elem.set('id', elem.get('id', 'cfs_%d'%counter))
                        counter += 1
                        if not extra_css.has_key(elem.get('id')):
                            extra_css[elem.get('id')] = []
                        extra_css[elem.get('id')].append('line-height:%fpt'%(lh*scale))
            # Rescale all computed font sizes
            for elem in body.iter(etree.Element):
                if isinstance(elem, HtmlElement):
                    elem.computed_font_size *= scale
        # Remove all font size specifications from the last stylesheet
        cls.remove_font_size_information(stylesheets[-1:])
        # Create the CSS to implement the rescaled font sizes
        for elem in body.iter(etree.Element):
            cfs, pcfs = map(operator.attrgetter('computed_font_size'), (elem, elem.getparent()))
            if abs(cfs-pcfs) > 1/12. and abs(pcfs) > 1/12.:
                elem.set('id', elem.get('id', 'cfs_%d'%counter))
                counter += 1
                if not extra_css.has_key(elem.get('id')):
                    extra_css[elem.get('id')] = []
                extra_css[elem.get('id')].append('font-size: %f%%'%(100*(cfs/pcfs)))
        css = CSSParser(loglevel=logging.ERROR).parseString('')
        for id, r in extra_css.items():
            css.add('#%s {%s}'%(id, ';'.join(r)))
        return css
    @classmethod
    def rationalize(cls, stylesheets, root, opts):
        logger     = logging.getLogger('html2epub')
        logger.info('\t\tRationalizing fonts...')
        extra_css = None
        if opts.base_font_size2 > 0:
            try:
                extra_css = cls.compute_font_sizes(root, stylesheets, base=opts.base_font_size2)
            except:
                logger.warning('Failed to rationalize font sizes.')
                if opts.verbose > 1:
                    logger.exception('')
            finally:
                root.remove_font_size_information()
        logger.debug('\t\tDone rationalizing')
        return extra_css
 ################################################################################
 ############## Testing
 ################################################################################
 class FontTest(unittest.TestCase):
    def setUp(self):
        from calibre.ebooks.epub import config
        self.opts = config(defaults='').parse()
        self.html = '''
        <html>
            <head>
                <title>Test document</title>
            </head>
            <body>
                <div id="div1">
                <!-- A comment -->
                    <p id="p1">Some <b>text</b></p>
                </div>
                <p id="p2">Some other <span class="it">text</span>.</p>
                <p id="longest">The longest piece of single font size text in this entire file. Used to test resizing.</p>
            </body>
        </html>
        '''
        self.root = fromstring(self.html)
    def do_test(self, css, base=DEFAULT_FONT_SIZE, scale=1):
        root1 = copy.deepcopy(self.root)
        root1.computed_font_size = DEFAULT_FONT_SIZE
        stylesheet = CSSParser(loglevel=logging.ERROR).parseString(css)
        stylesheet2 = Rationalizer.compute_font_sizes(root1, [stylesheet], base)
        root2 = copy.deepcopy(root1)
        root2.remove_font_size_information()
        root2.computed_font_size = DEFAULT_FONT_SIZE
        Rationalizer.apply_font_size_rules([stylesheet2], root2)
        for elem in root2.iter(etree.Element):
            Rationalizer.compute_font_size(elem)
        for e1, e2 in zip(root1.xpath('//body')[0].iter(etree.Element), root2.xpath('//body')[0].iter(etree.Element)):
            self.assertAlmostEqual(e1.computed_font_size, e2.computed_font_size,
                msg='Computed font sizes for %s not equal. Original: %f Processed: %f'%\
                (root1.getroottree().getpath(e1), e1.computed_font_size, e2.computed_font_size))
        return stylesheet2.cssText
    def testStripping(self):
        'Test that any original entries are removed from the CSS'
        css = 'p { font: bold 10px italic smaller; font-size: x-large} \na { font-size: 0 }'
        css = CSSParser(loglevel=logging.ERROR).parseString(css)
        Rationalizer.compute_font_sizes(copy.deepcopy(self.root), [css])
        self.assertEqual(css.cssText.replace(' ', '').replace('\n', ''),
                         'p{font:bolditalic}')
    def testIdentity(self):
        'Test that no unnecessary font size changes are made'
        extra_css = self.do_test('div {font-size:12pt} \nspan {font-size:100%}')
        self.assertEqual(extra_css.strip(), '')
    def testRelativization(self):
        'Test conversion of absolute to relative sizes'
        self.do_test('#p1 {font: 24pt} b {font: 12pt} .it {font: 48pt} #p2 {font: 100%}')
    def testResizing(self):
        'Test resizing of fonts'
        self.do_test('#longest {font: 24pt} .it {font:20pt; line-height:22pt}')
 def suite():
    return unittest.TestLoader().loadTestsFromTestCase(FontTest)
 def test():
    unittest.TextTestRunner(verbosity=2).run(suite())
 if __name__ == '__main__':
    sys.exit(test())
--- a/src/calibre/ebooks/epub/from_any.py
+++ b/src/calibre/ebooks/epub/from_any.py
@ -1,93 +0,0 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 Convert any ebook format to epub.
 '''
 import sys, os, re
 from contextlib import nested
 from calibre import extract, walk
 from calibre.ebooks import DRMError
 from calibre.ebooks.epub import config as common_config
 from calibre.ebooks.epub.from_html import convert as html2epub, find_html_index
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
 from calibre.customize.ui import run_plugins_on_preprocess
 SOURCE_FORMATS = ['lit', 'mobi', 'prc', 'azw', 'fb2', 'odt', 'rtf',
                  'txt', 'pdf', 'rar', 'zip', 'oebzip', 'htm', 'html', 'epub']
 def unarchive(path, tdir):
    extract(path, tdir)
    files = list(walk(tdir))
    for ext in ['opf'] + list(MAP.keys()):
        for f in files:
            if f.lower().endswith('.'+ext):
                if ext in ['txt', 'rtf'] and os.stat(f).st_size < 2048:
                    continue
                return f, ext
    return find_html_index(files)
 def any2epub(opts, path, notification=None, create_epub=True,
             oeb_cover=False, extract_to=None):
    path = run_plugins_on_preprocess(path)
    ext = os.path.splitext(path)[1]
    if not ext:
        raise ValueError('Unknown file type: '+path)
    ext = ext.lower()[1:]
    if opts.output is None:
        opts.output = os.path.splitext(os.path.basename(path))[0]+'.epub'
    with nested(TemporaryDirectory('_any2epub1'), TemporaryDirectory('_any2epub2')) as (tdir1, tdir2):
        if ext in ['rar', 'zip', 'oebzip']:
            path, ext = unarchive(path, tdir1)
            print 'Found %s file in archive'%(ext.upper())
        if ext in MAP.keys():
            path = MAP[ext](path, tdir2, opts)
            ext = 'opf'
        if re.match(r'((x){0,1}htm(l){0,1})|opf', ext) is None:
            raise ValueError('Conversion from %s is not supported'%ext.upper())
        print 'Creating EPUB file...'
        html2epub(path, opts, notification=notification,
                  create_epub=create_epub, oeb_cover=oeb_cover,
                  extract_to=extract_to)
 def config(defaults=None):
    return common_config(defaults=defaults)
 def formats():
    return ['html', 'rar', 'zip', 'oebzip']+list(MAP.keys())
 USAGE = _('''\
 %%prog [options] filename
 Convert any of a large number of ebook formats to a %s file. Supported formats are: %s
 ''')
 def option_parser(usage=USAGE):
    return config().option_parser(usage=usage%('EPUB', formats()))
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    if len(args) < 2:
        parser.print_help()
        print 'No input file specified.'
        return 1
    any2epub(opts, args[1])
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/epub/from_feeds.py
+++ b/src/calibre/ebooks/epub/from_feeds.py
@ -1,71 +0,0 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 Convert periodical content into EPUB ebooks.
 '''
 import sys, glob, os
 from calibre.web.feeds.main import config as feeds2disk_config, USAGE, run_recipe
 from calibre.ebooks.epub.from_html import config as html2epub_config
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.epub.from_html import convert as html2epub
 from calibre import strftime, sanitize_file_name
 def config(defaults=None):
    c = feeds2disk_config(defaults=defaults)
    c.remove('lrf')
    c.remove('epub')
    c.remove('output_dir')
    c.update(html2epub_config(defaults=defaults))
    c.remove('chapter_mark')
    return c
 def option_parser():
    c = config()
    return c.option_parser(usage=USAGE)
 def convert(opts, recipe_arg, notification=None):
    opts.lrf  = False
    opts.epub = True
    if opts.debug:
        opts.verbose = 2
    parser = option_parser()
    with TemporaryDirectory('_feeds2epub') as tdir:
        opts.output_dir = tdir
        recipe = run_recipe(opts, recipe_arg, parser, notification=notification)
        c = config()
        recipe_opts = c.parse_string(recipe.html2epub_options)
        c.smart_update(recipe_opts, opts)
        opts = recipe_opts
        opts.chapter_mark = 'none'
        opts.dont_split_on_page_breaks = True
        opf = glob.glob(os.path.join(tdir, '*.opf'))
        if not opf:
            raise Exception('Downloading of recipe: %s failed'%recipe_arg)
        opf = opf[0]
        if opts.output is None:
            fname = recipe.title + strftime(recipe.timefmt) + '.epub'
            opts.output = os.path.join(os.getcwd(), sanitize_file_name(fname))
        print 'Generating epub...'
        opts.encoding = 'utf-8'
        opts.remove_paragraph_spacing = True
        html2epub(opf, opts, notification=notification)
 def main(args=sys.argv, notification=None, handler=None):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    if len(args) != 2 and opts.feeds is None:
        parser.print_help()
        return 1
    recipe_arg = args[1] if len(args) > 1 else None
    convert(opts, recipe_arg, notification=notification)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -1,547 +0,0 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 Conversion of HTML/OPF files follows several stages:
    * All links in the HTML files or in the OPF manifest are
    followed to build up a list of HTML files to be converted.
    This stage is implemented by
    :function:`calibre.ebooks.html.traverse` and
    :class:`calibre.ebooks.html.HTMLFile`.
    * The HTML is pre-processed to make it more semantic.
    All links in the HTML files to other resources like images,
    stylesheets, etc. are relativized. The resources are copied
    into the `resources` sub directory. This is accomplished by
    :class:`calibre.ebooks.html.PreProcessor` and
    :class:`calibre.ebooks.html.Parser`.
    * The HTML is processed. Various operations are performed.
    All style declarations are extracted and consolidated into
    a single style sheet. Chapters are auto-detected and marked.
    Various font related manipulations are performed. See
    :class:`HTMLProcessor`.
    * The processed HTML is saved and the
    :module:`calibre.ebooks.epub.split` module is used to split up
    large HTML files into smaller chunks.
    * The EPUB container is created.
 '''
 import os, sys, cStringIO, logging, re, functools, shutil
 from lxml.etree import XPath
 from lxml import html, etree
 from PyQt4.Qt import QApplication, QPixmap, Qt
 from calibre.ebooks.html_old import Processor, merge_metadata, get_filelist,\
    opf_traverse, create_metadata, rebase_toc, Link, parser
 from calibre.ebooks.epub import config as common_config, tostring
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ebooks.epub import initialize_container, PROFILES
 from calibre.ebooks.epub.split import split
 from calibre.ebooks.epub.pages import add_page_map
 from calibre.ebooks.epub.fonts import Rationalizer
 from calibre.constants import preferred_encoding
 from calibre.customize.ui import run_plugins_on_postprocess
 from calibre import walk, CurrentDir, to_unicode, fit_image
 content = functools.partial(os.path.join, u'content')
 def remove_bad_link(element, attribute, link, pos):
    if attribute is not None:
        if element.tag in ['link']:
            element.getparent().remove(element)
        else:
            element.set(attribute, '')
            del element.attrib[attribute]
 def check_links(opf_path, pretty_print):
    '''
    Find and remove all invalid links in the HTML files
    '''
    logger = logging.getLogger('html2epub')
    logger.info('\tChecking files for bad links...')
    pathtoopf = os.path.abspath(opf_path)
    with CurrentDir(os.path.dirname(pathtoopf)):
        opf = OPF(open(pathtoopf, 'rb'), os.path.dirname(pathtoopf))
        html_files = []
        for item in opf.itermanifest():
            if 'html' in item.get('media-type', '').lower():
                f = item.get('href').split('/')[-1]
                if isinstance(f, str):
                    f = f.decode('utf-8')
                html_files.append(os.path.abspath(content(f)))
        for path in html_files:
            if not os.access(path, os.R_OK):
                continue
            base = os.path.dirname(path)
            root = html.fromstring(open(content(path), 'rb').read(), parser=parser)
            for element, attribute, link, pos in list(root.iterlinks()):
                link = to_unicode(link)
                plink = Link(link, base)
                bad = False
                if plink.path is not None and not os.path.exists(plink.path):
                    bad = True
                if bad:
                    remove_bad_link(element, attribute, link, pos)
            open(content(path), 'wb').write(tostring(root, pretty_print))
 def find_html_index(files):
    '''
    Given a list of files, find the most likely root HTML file in the
    list.
    '''
    html_pat = re.compile(r'\.(x){0,1}htm(l){0,1}$', re.IGNORECASE)
    html_files = [f for f in files if html_pat.search(f) is not None]
    if not html_files:
        raise ValueError(_('Could not find an ebook inside the archive'))
    html_files = [(f, os.stat(f).st_size) for f in html_files]
    html_files.sort(cmp = lambda x, y: cmp(x[1], y[1]))
    html_files = [f[0] for f in html_files]
    for q in ('toc', 'index'):
        for f in html_files:
            if os.path.splitext(os.path.basename(f))[0].lower() == q:
                return f, os.path.splitext(f)[1].lower()[1:]
    return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
 def rescale_images(imgdir, screen_size, log):
    pwidth, pheight = screen_size
    if QApplication.instance() is None:
        QApplication([])
    for f in os.listdir(imgdir):
        path = os.path.join(imgdir, f)
        if os.path.splitext(f)[1] in ('.css', '.js'):
            continue
        p = QPixmap()
        p.load(path)
        if p.isNull():
            continue
        width, height = p.width(), p.height()
        scaled, new_width, new_height = fit_image(width, height, pwidth,
                pheight)
        if scaled:
            log.info('Rescaling image: '+f)
            p.scaled(new_width, new_height, Qt.IgnoreAspectRatio,
                    Qt.SmoothTransformation).save(path, 'JPEG')
 class HTMLProcessor(Processor, Rationalizer):
    def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets):
        Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
                           name='html2epub')
        if opts.verbose > 2:
            self.debug_tree('parsed')
        self.detect_chapters()
        self.extract_css(stylesheets)
        if self.opts.base_font_size2 > 0:
            self.font_css = self.rationalize(self.external_stylesheets+[self.stylesheet],
                                             self.root, self.opts)
        if opts.verbose > 2:
            self.debug_tree('nocss')
        if hasattr(self.body, 'xpath'):
            for script in list(self.body.xpath('descendant::script')):
                script.getparent().remove(script)
        self.fix_markup()
    def convert_image(self, img):
        rpath = img.get('src', '')
        path = os.path.join(os.path.dirname(self.save_path()), *rpath.split('/'))
        if os.path.exists(path) and os.path.isfile(path):
            if QApplication.instance() is None:
                app = QApplication([])
                app
            p = QPixmap()
            p.load(path)
            if not p.isNull():
                p.save(path + '_calibre_converted.jpg')
                os.remove(path)
                for key, val in self.resource_map.items():
                    if val == rpath:
                        self.resource_map[key] = rpath+'_calibre_converted.jpg'
        img.set('src', rpath+'_calibre_converted.jpg')
    def fix_markup(self):
        '''
        Perform various markup transforms to get the output to render correctly
        in the quirky ADE.
        '''
        # Replace <br> that are children of <body> as ADE doesn't handle them
        if hasattr(self.body, 'xpath'):
            for br in self.body.xpath('./br'):
                if br.getparent() is None:
                    continue
                try:
                    sibling = br.itersiblings().next()
                except:
                    sibling = None
                br.tag = 'p'
                br.text = u'\u00a0'
                if (br.tail and br.tail.strip()) or sibling is None or \
                   getattr(sibling, 'tag', '') != 'br':
                    style = br.get('style', '').split(';')
                    style = filter(None, map(lambda x: x.strip(), style))
                    style.append('margin: 0pt; border:0pt; height:0pt')
                    br.set('style', '; '.join(style))
                else:
                    sibling.getparent().remove(sibling)
                    if sibling.tail:
                        if not br.tail:
                            br.tail = ''
                        br.tail += sibling.tail
        if self.opts.profile.remove_object_tags:
            for tag in self.root.xpath('//embed'):
                tag.getparent().remove(tag)
            for tag in self.root.xpath('//object'):
                if tag.get('type', '').lower().strip() in ('image/svg+xml',):
                    continue
                tag.getparent().remove(tag)
        for tag in self.root.xpath('//title|//style'):
            if not tag.text:
                tag.getparent().remove(tag)
        for tag in self.root.xpath('//script'):
            if not tag.text and not tag.get('src', False):
                tag.getparent().remove(tag)
        for tag in self.root.xpath('//form'):
            tag.getparent().remove(tag)
        for tag in self.root.xpath('//center'):
            tag.tag = 'div'
            tag.set('style', 'text-align:center')
        if self.opts.linearize_tables:
            for tag in self.root.xpath('//table | //tr | //th | //td'):
                tag.tag = 'div'
        # ADE can't handle &amp; in an img url
        for tag in self.root.xpath('//img[@src]'):
            tag.set('src', tag.get('src', '').replace('&', ''))
    def save(self):
        for meta in list(self.root.xpath('//meta')):
            meta.getparent().remove(meta)
        # Strip all comments since Adobe DE is petrified of them
        Processor.save(self, strip_comments=True)
    def remove_first_image(self):
        images = self.root.xpath('//img')
        if images:
            images[0].getparent().remove(images[0])
            return True
        return False
 def config(defaults=None):
    return common_config(defaults=defaults)
 def option_parser():
    c = config()
    return c.option_parser(usage=_('''\
 %prog [options] file.html|opf
 Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file.
 If you specify an OPF file instead of an HTML file, the list of links is takes from
 the <spine> element of the OPF file.
 '''))
 def parse_content(filelist, opts, tdir):
    os.makedirs(os.path.join(tdir, 'content', 'resources'))
    resource_map, stylesheets = {}, {}
    toc = TOC(base_path=tdir, type='root')
    stylesheet_map = {}
    first_image_removed = False
    for htmlfile in filelist:
        logging.getLogger('html2epub').debug('Processing %s...'%htmlfile)
        hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
                           resource_map, filelist, stylesheets)
        if not first_image_removed and opts.remove_first_image:
            first_image_removed = hp.remove_first_image()
        hp.populate_toc(toc)
        hp.save()
        stylesheet_map[os.path.basename(hp.save_path())] = \
            [s for s in hp.external_stylesheets + [hp.stylesheet, hp.font_css, hp.override_css] if s is not None]
    logging.getLogger('html2epub').debug('Saving stylesheets...')
    if opts.base_font_size2 > 0:
        Rationalizer.remove_font_size_information(stylesheets.values())
        for path, css in stylesheets.items():
            raw = getattr(css, 'cssText', css)
            if isinstance(raw, unicode):
                raw = raw.encode('utf-8')
            open(path, 'wb').write(raw)
    if toc.count('chapter') > opts.toc_threshold:
        toc.purge(['file', 'link', 'unknown'])
    if toc.count('chapter') + toc.count('file') > opts.toc_threshold:
        toc.purge(['link', 'unknown'])
    toc.purge(['link'], max=opts.max_toc_links)
    return resource_map, hp.htmlfile_map, toc, stylesheet_map
 TITLEPAGE = '''\
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
    <head>
        <title>Cover</title>
        <style type="text/css" title="override_css">
            @page {padding: 0pt; margin:0pt}
            body { text-align: center; padding:0pt; margin: 0pt; }
            div { margin: 0pt; padding: 0pt; }
        </style>
    </head>
    <body>
        <div>
            <img src="%s" alt="cover" style="height: 100%%" />
        </div>
    </body>
 </html>
 '''
 def create_cover_image(src, dest, screen_size, rescale_cover=True):
    try:
        from PyQt4.Qt import QImage, Qt
        if QApplication.instance() is None:
            QApplication([])
        im = QImage()
        im.load(src)
        if im.isNull():
            raise ValueError('Invalid cover image')
        if rescale_cover and screen_size is not None:
            width, height = im.width(), im.height()
            dw, dh = (screen_size[0]-width)/float(width), (screen_size[1]-height)/float(height)
            delta = min(dw, dh)
            if delta > 0:
                nwidth = int(width + delta*(width))
                nheight = int(height + delta*(height))
                im = im.scaled(int(nwidth), int(nheight), Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
        im.save(dest)
    except:
        import traceback
        traceback.print_exc()
        return False
    return True
 def process_title_page(mi, filelist, htmlfilemap, opts, tdir):
    old_title_page = None
    f = lambda x : os.path.normcase(os.path.normpath(x))
    if not isinstance(mi.cover, basestring):
        mi.cover = None
    if mi.cover:
        if f(filelist[0].path) == f(mi.cover):
            old_title_page = htmlfilemap[filelist[0].path]
    #logger = logging.getLogger('html2epub')
    metadata_cover = mi.cover
    if metadata_cover and not os.path.exists(metadata_cover):
        metadata_cover = None
    cpath = '/'.join(('resources', '_cover_.jpg'))
    cover_dest = os.path.join(tdir, 'content', *cpath.split('/'))
    if metadata_cover is not None:
        if not create_cover_image(metadata_cover, cover_dest,
                                  opts.profile.screen_size):
            metadata_cover = None
    specified_cover = opts.cover
    if specified_cover and not os.path.exists(specified_cover):
        specified_cover = None
    if specified_cover is not None:
        if not create_cover_image(specified_cover, cover_dest,
                                  opts.profile.screen_size):
            specified_cover = None
    cover = metadata_cover if specified_cover is None or (opts.prefer_metadata_cover and metadata_cover is not None) else specified_cover
    if cover is not None:
        titlepage = TITLEPAGE%cpath
        tp = 'calibre_title_page.html' if old_title_page is None else old_title_page
        tppath = os.path.join(tdir, 'content', tp)
        with open(tppath, 'wb') as f:
            f.write(titlepage)
        return tp if old_title_page is None else None, True
    elif os.path.exists(cover_dest):
        os.remove(cover_dest)
    return None, old_title_page is not None
 def find_oeb_cover(htmlfile):
    if os.stat(htmlfile).st_size > 2048:
        return None
    match = re.search(r'(?i)<img[^<>]+src\s*=\s*[\'"](.+?)[\'"]', open(htmlfile, 'rb').read())
    if match:
        return match.group(1)
 def condense_ncx(ncx_path):
    tree = etree.parse(ncx_path)
    for tag in tree.getroot().iter(tag=etree.Element):
        if tag.text:
            tag.text = tag.text.strip()
        if tag.tail:
            tag.tail = tag.tail.strip()
    compressed = etree.tostring(tree.getroot(), encoding='utf-8')
    open(ncx_path, 'wb').write(compressed)
 def convert(htmlfile, opts, notification=None, create_epub=True,
            oeb_cover=False, extract_to=None):
    htmlfile = os.path.abspath(htmlfile)
    if opts.output is None:
        opts.output = os.path.splitext(os.path.basename(htmlfile))[0] + '.epub'
    opts.profile = PROFILES[opts.profile]
    opts.output = os.path.abspath(opts.output)
    if opts.override_css is not None:
        try:
            opts.override_css = open(opts.override_css, 'rb').read().decode(preferred_encoding, 'replace')
        except:
            opts.override_css = opts.override_css.decode(preferred_encoding, 'replace')
    if opts.from_opf:
        opts.from_opf = os.path.abspath(opts.from_opf)
    if opts.from_ncx:
        opts.from_ncx = os.path.abspath(opts.from_ncx)
    if htmlfile.lower().endswith('.opf'):
        opf = OPF(htmlfile, os.path.dirname(os.path.abspath(htmlfile)))
        filelist = opf_traverse(opf, verbose=opts.verbose, encoding=opts.encoding)
        if not filelist:
            # Bad OPF look for a HTML file instead
            htmlfile = find_html_index(walk(os.path.dirname(htmlfile)))[0]
            if htmlfile is None:
                raise ValueError('Could not find suitable file to convert.')
            filelist = get_filelist(htmlfile, opts)[1]
        mi = merge_metadata(None, opf, opts)
    else:
        opf, filelist = get_filelist(htmlfile, opts)
        mi = merge_metadata(htmlfile, opf, opts)
    opts.chapter = XPath(opts.chapter,
                    namespaces={'re':'http://exslt.org/regular-expressions'})
    for x in (1, 2, 3):
        attr = 'level%d_toc'%x
        if getattr(opts, attr):
            setattr(opts, attr, XPath(getattr(opts, attr),
                      namespaces={'re':'http://exslt.org/regular-expressions'}))
        else:
            setattr(opts, attr, None)
    with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir:
        if opts.keep_intermediate:
            print 'Intermediate files in', tdir
        resource_map, htmlfile_map, generated_toc, stylesheet_map = \
                                        parse_content(filelist, opts, tdir)
        logger = logging.getLogger('html2epub')
        resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
        title_page, has_title_page = process_title_page(mi, filelist, htmlfile_map, opts, tdir)
        spine = [htmlfile_map[f.path] for f in filelist]
        if not oeb_cover and title_page is not None:
            spine = [title_page] + spine
        mi.cover = None
        mi.cover_data = (None, None)
        mi = create_metadata(tdir, mi, spine, resources)
        buf = cStringIO.StringIO()
        if mi.toc:
            rebase_toc(mi.toc, htmlfile_map, tdir)
        if opts.use_auto_toc or mi.toc is None or len(list(mi.toc.flat())) < 2:
            mi.toc = generated_toc
        if opts.from_ncx:
            toc = TOC()
            toc.read_ncx_toc(opts.from_ncx)
            mi.toc = toc
        for item in mi.manifest:
            if getattr(item, 'mime_type', None) == 'text/html':
                item.mime_type = 'application/xhtml+xml'
        opf_path = os.path.join(tdir, 'metadata.opf')
        with open(opf_path, 'wb') as f:
            mi.render(f, buf, 'toc.ncx')
        toc = buf.getvalue()
        if toc:
            with open(os.path.join(tdir, 'toc.ncx'), 'wb') as f:
                f.write(toc)
            if opts.show_ncx:
                print toc
        split(opf_path, opts, stylesheet_map)
        if opts.page:
            logger.info('\tBuilding page map...')
            add_page_map(opf_path, opts)
        check_links(opf_path, opts.pretty_print)
        opf = OPF(opf_path, tdir)
        opf.remove_guide()
        oeb_cover_file = None
        if oeb_cover and title_page is not None:
            oeb_cover_file = find_oeb_cover(os.path.join(tdir, 'content', title_page))
        if has_title_page or (oeb_cover and oeb_cover_file):
            opf.create_guide_element()
            if has_title_page and not oeb_cover:
                opf.add_guide_item('cover', 'Cover', 'content/'+spine[0])
            if oeb_cover and oeb_cover_file:
                opf.add_guide_item('cover', 'Cover', 'content/'+oeb_cover_file)
        cpath = os.path.join(tdir, 'content', 'resources', '_cover_.jpg')
        if os.path.exists(cpath):
            opf.add_path_to_manifest(cpath, 'image/jpeg')
        with open(opf_path, 'wb') as f:
            f.write(opf.render())
        ncx_path = os.path.join(os.path.dirname(opf_path), 'toc.ncx')
        if os.path.exists(ncx_path) and os.stat(ncx_path).st_size > opts.profile.flow_size:
            logger.info('Condensing NCX from %d bytes...'%os.stat(ncx_path).st_size)
            condense_ncx(ncx_path)
            if os.stat(ncx_path).st_size > opts.profile.flow_size:
                logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size)
        if opts.profile.screen_size is not None:
            rescale_images(os.path.join(tdir, 'content', 'resources'),
                    opts.profile.screen_size, logger)
        if create_epub:
            epub = initialize_container(opts.output)
            epub.add_dir(tdir)
            epub.close()
            run_plugins_on_postprocess(opts.output, 'epub')
            logger.info(_('Output written to ')+opts.output)
        if opts.show_opf:
            print open(opf_path, 'rb').read()
        if opts.extract_to is not None:
            if os.path.exists(opts.extract_to):
                shutil.rmtree(opts.extract_to)
            shutil.copytree(tdir, opts.extract_to)
        if extract_to is not None:
            if os.path.exists(extract_to):
                shutil.rmtree(extract_to)
            shutil.copytree(tdir, extract_to)
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    if len(args) < 2:
        parser.print_help()
        print _('You must specify an input HTML file')
        return 1
    convert(args[1], opts)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/epub/output.py
+++ b/src/calibre/ebooks/epub/output.py
@ -0,0 +1,239 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os
 from urllib import unquote
 from calibre.customize.conversion import OutputFormatPlugin
 from calibre.ptempfile import TemporaryDirectory
 from calibre.constants import __appname__, __version__
 from calibre import strftime, guess_type
 from lxml import etree
 class EPUBOutput(OutputFormatPlugin):
    name = 'EPUB Output'
    author = 'Kovid Goyal'
    file_type = 'epub'
    TITLEPAGE_COVER = '''\
 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
    <head>
        <title>Cover</title>
        <style type="text/css" title="override_css">
            @page {padding: 0pt; margin:0pt}
            body { text-align: center; padding:0pt; margin: 0pt; }
            div { margin: 0pt; padding: 0pt; }
        </style>
    </head>
    <body>
        <div>
            <img src="%s" alt="cover" style="height: 100%%" />
        </div>
    </body>
 </html>
 '''
    TITLEPAGE = '''\
 <html  xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
    <head>
        <style type="text/css">
            body {
                background: white no-repeat fixed center center;
                text-align: center;
                vertical-align: center;
                overflow: hidden;
                font-size: 18px;
            }
            h1 { font-family: serif; }
            h2, h4 { font-family: monospace; }
        </style>
    </head>
    <body>
        <h1>%(title)s</h1>
        <br/><br/>
        <div style="position:relative">
            <div style="position: absolute; left: 0; top: 0; width:100%%; height:100%%; vertical-align:center">
                <img src="%(img)s" alt="calibre" style="opacity:0.3"/>
            </div>
            <div style="position: absolute; left: 0; top: 0; width:100%%; height:100%%; vertical-align:center">
                <h2>%(date)s</h2>
                <br/><br/><br/><br/><br/>
                <h3>%(author)s</h3>
                <br/><br/></br/><br/><br/><br/><br/><br/><br/>
                <h4>Produced by %(app)s</h4>
            </div>
        </div>
    </body>
 </html>
 '''
    def convert(self, oeb, output_path, input_plugin, opts, log):
        self.log, self.opts, self.oeb = log, opts, oeb
        self.workaround_ade_quirks()
        from calibre.ebooks.oeb.transforms.rescale import RescaleImages
        RescaleImages()(oeb, opts)
        self.insert_cover()
        with TemporaryDirectory('_epub_output') as tdir:
            from calibre.customize.ui import plugin_for_output_format
            oeb_output = plugin_for_output_format('oeb')
            oeb_output.convert(oeb, tdir, input_plugin, opts, log)
            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
            self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
                    if x.endswith('.ncx')][0])
            from calibre.epub import initialize_container
            epub = initialize_container(output_path, os.path.basename(opf))
            epub.add_dir(tdir)
            epub.close()
    def default_cover(self):
        '''
        Create a generic cover for books that dont have a cover
        '''
        try:
            from calibre.gui2 import images_rc # Needed for access to logo
            from PyQt4.Qt import QApplication, QFile, QIODevice
        except:
            return None
        from calibre.ebooks.metadata import authors_to_string
        images_rc
        m = self.oeb.metadata
        title = unicode(m.title[0])
        a = [unicode(x) for x in m.creators if m.role == 'aut']
        author = authors_to_string(a)
        if QApplication.instance() is None: QApplication([])
        f = QFile(':/library')
        f.open(QIODevice.ReadOnly)
        img_data = str(f.readAll())
        id, href = self.oeb.manifest.generate('calibre-logo',
                'calibre-logo.png')
        self.oeb.manifest.add(id, href, 'image/png', data=img_data)
        html = self.TITLEPAGE%dict(title=title, author=author,
                date=strftime('%d %b, %Y'),
                app=__appname__ +' '+__version__,
                img=href)
        id, href = self.oeb.manifest.generate('calibre-titlepage',
                'calibre-titlepage.xhtml')
        return self.oeb.manifest.add(id, href, guess_type('t.xhtml')[0],
                data=etree.fromstring(html))
    def insert_cover(self):
        from calibre.ebooks.oeb.base import urldefrag
        from calibre import guess_type
        g, m = self.oeb.guide, self.oeb.manifest
        if 'titlepage' not in g:
            if 'cover' in g:
                tp = self.TITLEPAGE_COVER%unquote(g['cover'].href)
                id, href = m.generate('titlepage', 'titlepage.xhtml')
                item = m.add(id, href, guess_type('t.xhtml'),
                        data=etree.fromstring(tp))
            else:
                item = self.default_cover()
        else:
            item = self.oeb.manifest.hrefs[
                    urldefrag(self.oeb.guide['titlepage'].href)[0]]
        if item is not None:
            self.oeb.spine.insert(0, item, True)
            self.oeb.guide.refs['cover'].href = item.href
            self.oeb.guide.refs['titlepage'].href = item.href
    def condense_ncx(self, ncx_path):
        if not self.opts.pretty_print:
            tree = etree.parse(ncx_path)
            for tag in tree.getroot().iter(tag=etree.Element):
                if tag.text:
                    tag.text = tag.text.strip()
                if tag.tail:
                    tag.tail = tag.tail.strip()
            compressed = etree.tostring(tree.getroot(), encoding='utf-8')
            open(ncx_path, 'wb').write(compressed)
    def workaround_ade_quirks(self):
        '''
        Perform various markup transforms to get the output to render correctly
        in the quirky ADE.
        '''
        from calibre.ebooks.oeb.base import XPNSMAP, XHTML
        from lxml.etree import XPath as _XPath
        from functools import partial
        XPath = partial(_XPath, namespaces=XPNSMAP)
        for x in self.oeb.spine:
            root = x.data
            body = XPath('//h:body')(root)
            if body:
                body = body[0]
            # Replace <br> that are children of <body> as ADE doesn't handle them
            if hasattr(body, 'xpath'):
                for br in body.xpath('./h:br'):
                    if br.getparent() is None:
                        continue
                    try:
                        sibling = br.itersiblings().next()
                    except:
                        sibling = None
                    br.tag = XHTML('p')
                    br.text = u'\u00a0'
                    if (br.tail and br.tail.strip()) or sibling is None or \
                    getattr(sibling, 'tag', '') != XHTML('br'):
                        style = br.get('style', '').split(';')
                        style = filter(None, map(lambda x: x.strip(), style))
                        style.append('margin: 0pt; border:0pt; height:0pt')
                        br.set('style', '; '.join(style))
                    else:
                        sibling.getparent().remove(sibling)
                        if sibling.tail:
                            if not br.tail:
                                br.tail = ''
                            br.tail += sibling.tail
            if self.opts.output_profile.remove_object_tags:
                for tag in root.xpath('//h:embed'):
                    tag.getparent().remove(tag)
                for tag in root.xpath('//h:object'):
                    if tag.get('type', '').lower().strip() in ('image/svg+xml',):
                        continue
                    tag.getparent().remove(tag)
            for tag in root.xpath('//h:title|//h:style'):
                if not tag.text:
                    tag.getparent().remove(tag)
            for tag in root.xpath('//h:script'):
                if not tag.text and not tag.get('src', False):
                    tag.getparent().remove(tag)
            for tag in root.xpath('//h:form'):
                tag.getparent().remove(tag)
            for tag in root.xpath('//h:center'):
                tag.tag = XHTML('div')
                tag.set('style', 'text-align:center')
            # ADE can't handle &amp; in an img url
            for tag in self.root.xpath('//h:img[@src]'):
                tag.set('src', tag.get('src', '').replace('&', ''))
            stylesheet = self.oeb.manifest.hrefs['stylesheet.css']
            stylesheet.data.add('a { color: inherit; text-decoration: inherit; '
                    'cursor: default; }')
            stylesheet.data.add('a[href] { color: blue; '
                    'text-decoration: underline; cursor:pointer; }')
--- a/src/calibre/ebooks/metadata/init.py
+++ b/src/calibre/ebooks/metadata/init.py
@ -260,6 +260,9 @@ class MetaInformation(object):
            x = 1.0
        return '%d'%x if int(x) == x else '%.2f'%x
    def authors_from_string(self, raw):
        self.authors = string_to_authors(raw)
    def __unicode__(self):
        ans = []
        def fmt(x, y):
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -514,7 +514,8 @@ class Metadata(object):
        scheme  = Attribute(lambda term: 'scheme' if \
                                term == OPF('meta') else OPF('scheme'),
                            [DC('identifier'), OPF('meta')])
-        file_as = Attribute(OPF('file-as'), [DC('creator'), DC('contributor')])
+        file_as = Attribute(OPF('file-as'), [DC('creator'), DC('contributor'),
                                             DC('title')])
        role    = Attribute(OPF('role'), [DC('creator'), DC('contributor')])
        event   = Attribute(OPF('event'), [DC('date')])
        id      = Attribute('id')
@ -593,6 +594,19 @@ class Metadata(object):
            yield key
    __iter__ = iterkeys
    def clear(self, key):
        l = self.items[key]
        for x in list(l):
            l.remove(x)
    def filter(self, key, predicate):
        l = self.items[key]
        for x in list(l):
            if predicate(x):
                l.remove(x)
    def __getitem__(self, key):
        return self.items[key]
@ -1011,7 +1025,7 @@ class Manifest(object):
                media_type = OEB_DOC_MIME
            elif media_type in OEB_STYLES:
                media_type = OEB_CSS_MIME
-            attrib = {'id': item.id, 'href': item.href,
+            attrib = {'id': item.id, 'href': urlunquote(item.href),
                      'media-type': media_type}
            if item.fallback:
                attrib['fallback'] = item.fallback
@ -1202,6 +1216,9 @@ class Guide(object):
        self.refs[type] = ref
        return ref
    def remove(self, type):
        return self.refs.pop(type, None)
    def iterkeys(self):
        for type in self.refs:
            yield type
@ -1229,7 +1246,7 @@ class Guide(object):
    def to_opf1(self, parent=None):
        elem = element(parent, 'guide')
        for ref in self.refs.values():
-            attrib = {'type': ref.type, 'href': ref.href}
+            attrib = {'type': ref.type, 'href': urlunquote(ref.href)}
            if ref.title:
                attrib['title'] = ref.title
            element(elem, 'reference', attrib=attrib)
@ -1345,7 +1362,7 @@ class TOC(object):
    def to_opf1(self, tour):
        for node in self.nodes:
            element(tour, 'site', attrib={
-                'title': node.title, 'href': node.href})
+                'title': node.title, 'href': urlunquote(node.href)})
            node.to_opf1(tour)
        return tour
@ -1358,7 +1375,7 @@ class TOC(object):
            point = element(parent, NCX('navPoint'), attrib=attrib)
            label = etree.SubElement(point, NCX('navLabel'))
            element(label, NCX('text')).text = node.title
-            element(point, NCX('content'), src=node.href)
+            element(point, NCX('content'), src=urlunquote(node.href))
            node.to_ncx(point)
        return parent
--- a/src/calibre/ebooks/oeb/iterator.py
+++ b/src/calibre/ebooks/oeb/iterator.py
@ -12,13 +12,15 @@ from cStringIO import StringIO
 from PyQt4.Qt import QFontDatabase
 from calibre.customize.ui import available_input_formats
 from calibre.ebooks.epub.from_html import TITLEPAGE
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.zipfile import safe_replace, ZipFile
 from calibre.utils.config import DynamicConfig
 from calibre.utils.logging import Log
 from calibre.ebooks.epub.output import EPUBOutput
 TITLEPAGE = EPUBOutput.TITLEPAGE_COVER
 def character_count(html):
    '''
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@ -9,6 +9,7 @@ from lxml import etree
 from calibre.customize.conversion import OutputFormatPlugin
 from calibre import CurrentDir
 from urllib import unquote
 class OEBOutput(OutputFormatPlugin):
@ -32,7 +33,7 @@ class OEBOutput(OutputFormatPlugin):
                        f.write(raw)
            for item in oeb_book.manifest:
-                path = os.path.abspath(item.href)
+                path = os.path.abspath(unquote(item.href))
                dir = os.path.dirname(path)
                if not os.path.exists(dir):
                    os.makedirs(dir)
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -11,6 +11,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 import os
 import itertools
 import re
 import logging
 import copy
 from weakref import WeakKeyDictionary
 from xml.dom import SyntaxErr as CSSSyntaxError
@ -106,7 +107,8 @@ class CSSSelector(etree.XPath):
 class Stylizer(object):
    STYLESHEETS = WeakKeyDictionary()
-    def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'], extra_css=''):
+    def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'],
            extra_css='', user_css=''):
        self.oeb = oeb
        self.profile = profile
        self.logger = oeb.logger
@ -115,7 +117,8 @@ class Stylizer(object):
        cssname = os.path.splitext(basename)[0] + '.css'
        stylesheets = [HTML_CSS_STYLESHEET]
        head = xpath(tree, '/h:html/h:head')[0]
-        parser = cssutils.CSSParser(fetcher=self._fetch_css_file)
+        parser = cssutils.CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        for elem in head:
            if elem.tag == XHTML('style') and elem.text \
               and elem.get('type', CSS_MIME) in OEB_STYLES:
@ -135,11 +138,12 @@ class Stylizer(object):
                        (path, item.href))
                    continue
                stylesheets.append(sitem.data)
-        if extra_css:
+        for x in (extra_css, user_css):
-            text = XHTML_CSS_NAMESPACE + extra_css
+            if x:
-            stylesheet = parser.parseString(text, href=cssname)
+                text = XHTML_CSS_NAMESPACE + x
-            stylesheet.namespaces['h'] = XHTML_NS
+                stylesheet = parser.parseString(text, href=cssname)
-            stylesheets.append(stylesheet)
+                stylesheet.namespaces['h'] = XHTML_NS
                stylesheets.append(stylesheet)
        rules = []
        index = 0
        self.stylesheets = set()
@ -288,6 +292,9 @@ class Style(object):
        self._lineHeight = None
        stylizer._styles[element] = self
    def set(self, prop, val):
        self._style[prop] = val
    def _update_cssdict(self, cssdict):
        self._style.update(cssdict)
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@ -114,12 +114,27 @@ class CSSFlattener(object):
    def stylize_spine(self):
        self.stylizers = {}
        profile = self.context.source
        css = ''
        for item in self.oeb.spine:
            html = item.data
            body = html.find(XHTML('body'))
            bs = body.get('style', '').split(';')
            bs.append('margin-top: 0pt')
            bs.append('margin-bottom: 0pt')
            bs.append('margin-left : %fpt'%\
                    float(self.context.margin_left))
            bs.append('margin-right : %fpt'%\
                    float(self.context.margin_right))
            bs.append('text-align: '+ \
                    ('left' if self.context.dont_justify else 'justify'))
            body.set('style', '; '.join(bs))
            stylizer = Stylizer(html, item.href, self.oeb, profile,
-                    extra_css=self.context.extra_css)
+                    user_css=self.context.extra_css,
                    extra_css=css)
            self.stylizers[item] = stylizer
    def baseline_node(self, node, stylizer, sizes, csize):
        csize = stylizer.style(node)['font-size']
        if node.text:
@ -219,6 +234,15 @@ class CSSFlattener(object):
        if self.lineh and 'line-height' not in cssdict:
            lineh = self.lineh / psize
            cssdict['line-height'] = "%0.5fem" % lineh
        if (self.context.remove_paragraph_spacing or
                self.context.insert_blank_line) and tag in ('p', 'div'):
            for prop in ('margin', 'padding', 'border'):
                for edge in ('top', 'bottom'):
                    cssdict['%s-%s'%(prop, edge)] = '0pt'
            if self.context.insert_blank_line:
                cssdict['margin-top'] = cssdict['margin-bottom'] = '0.5em'
            if self.context.remove_paragraph_spacing:
                cssdict['text-indent'] = '1.5em'
        if cssdict:
            items = cssdict.items()
            items.sort()
@ -253,12 +277,16 @@ class CSSFlattener(object):
        href = item.relhref(href)
        etree.SubElement(head, XHTML('link'),
            rel='stylesheet', type=CSS_MIME, href=href)
-        if stylizer.page_rule:
+        stylizer.page_rule['margin-top'] = '%fpt'%\
-            items = stylizer.page_rule.items()
+                float(self.context.margin_top)
-            items.sort()
+        stylizer.page_rule['margin-bottom'] = '%fpt'%\
-            css = '; '.join("%s: %s" % (key, val) for key, val in items)
+                float(self.context.margin_bottom)
-            style = etree.SubElement(head, XHTML('style'), type=CSS_MIME)
+
-            style.text = "@page { %s; }" % css
+        items = stylizer.page_rule.items()
        items.sort()
        css = '; '.join("%s: %s" % (key, val) for key, val in items)
        style = etree.SubElement(head, XHTML('style'), type=CSS_MIME)
        style.text = "@page { %s; }" % css
    def replace_css(self, css):
        manifest = self.oeb.manifest
@ -285,3 +313,4 @@ class CSSFlattener(object):
        for item in self.oeb.spine:
            stylizer = self.stylizers[item]
            self.flatten_head(item, stylizer, href)
--- a/src/calibre/ebooks/oeb/transforms/guide.py
+++ b/src/calibre/ebooks/oeb/transforms/guide.py
@ -0,0 +1,52 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 class Clean(object):
    '''Clean up guide, leaving only a pointer to the cover'''
    def __call__(self, oeb, opts):
        from calibre.ebooks.oeb.base import urldefrag
        self.oeb, self.log, self.opts = oeb, oeb.log, opts
        protected_hrefs = set([])
        if 'titlepage' in self.oeb.guide:
            protected_hrefs.add(urldefrag(
                self.oeb.guide['titlepage'].href)[0])
        if 'cover' not in self.oeb.guide:
            covers = []
            for x in ('other.ms-coverimage-standard',
                    'other.ms-titleimage-standard', 'other.ms-titleimage',
                    'other.ms-coverimage', 'other.ms-thumbimage-standard',
                    'other.ms-thumbimage'):
                if x in self.oeb.guide:
                    href = self.oeb.guide[x].href
                    item = self.oeb.manifest.hrefs[href]
                    covers.append([self.oeb.guide[x], len(item.data)])
            covers.sort(cmp=lambda x,y:cmp(x[1], y[1]), reverse=True)
            if covers:
                ref = covers[0][0]
                if len(covers) > 1:
                    self.log('Choosing %s:%s as the cover'%(ref.type, ref.href))
                ref.type = 'cover'
                self.oeb.guide.refs['cover'] = ref
                protected_hrefs.add(urldefrag(ref.href)[0])
        else:
            protected_hrefs.add(urldefrag(self.oeb.guide.refs['cover'].href)[0])
        for x in list(self.oeb.guide):
            href = urldefrag(self.oeb.guide[x].href)[0]
            if x.lower() != ('cover', 'titlepage'):
                try:
                    if href not in protected_hrefs:
                        self.oeb.manifest.remove(self.oeb.manifest.hrefs[href])
                except KeyError:
                    pass
                self.oeb.guide.remove(x)
--- a/src/calibre/ebooks/oeb/transforms/jacket.py
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@ -0,0 +1,66 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import textwrap
 from lxml import etree
 from calibre.ebooks.oeb.base import XPNSMAP
 from calibre import guess_type
 class Jacket(object):
    '''
    Book jacket manipulation. Remove first image and insert comments at start of
    book.
    '''
    JACKET_TEMPLATE = textwrap.dedent(u'''\
    <html xmlns="%(xmlns)s">
        <head>
            <title>%(title)s</title>
        </head>
        <body>
            <h1 style="text-align: center">%(title)s</h1>
            <h2 style="text-align: center">%(jacket)s</h2>
            <div>
                %(comments)s
            </div>
        </body>
    </html>
    ''')
    def remove_first_image(self):
        for i, item in enumerate(self.oeb.spine):
            if i > 2: break
            for img in item.data.xpath('//h:img[@src]', namespace=XPNSMAP):
                href = item.abshref(img.get('src'))
                image = self.oeb.manifest.hrefs.get(href, None)
                if image is not None:
                    self.log('Removing first image', img.get('src'))
                    self.oeb.manifest.remove(image)
                    img.getparent().remove(img)
                    return
    def insert_comments(self, comments):
        self.log('Inserting metadata comments into book...')
        comments = comments.replace('\r\n', '\n').replace('\n\n', '<br/><br/>')
        html = self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'],
                title=self.opts.title, comments=comments,
                jacket=_('Book Jacket'))
        id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml')
        root = etree.fromstring(html)
        item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root)
        self.oeb.spine.insert(0, item, True)
    def __call__(self, oeb, opts):
        self.oeb, self.opts, self.log = oeb, opts, oeb.log
        if opts.remove_first_image:
            self.remove_fisrt_image()
        if opts.insert_comments and opts.comments:
            self.insert_comments(opts.comments)
--- a/src/calibre/ebooks/oeb/transforms/metadata.py
+++ b/src/calibre/ebooks/oeb/transforms/metadata.py
@ -0,0 +1,84 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os
 class MergeMetadata(object):
    'Merge in user metadata, including cover'
    def __call__(self, oeb, mi, prefer_metadata_cover=False):
        from calibre.ebooks.oeb.base import DC
        self.oeb, self.log = oeb, oeb.log
        m = self.oeb.metadata
        self.log('Merging user specified metadata...')
        if mi.title:
            m.clear('title')
            m.add('title', mi.title)
        if mi.title_sort:
            if not m.title:
                m.add(DC('title'), mi.title_sort)
            m.title[0].file_as = mi.title_sort
        if mi.authors:
            m.filter('creator', lambda x : x.role.lower() == 'aut')
            for a in mi.authors:
                attrib = {'role':'aut'}
                if mi.author_sort:
                    attrib['file_as'] = mi.author_sort
                m.add('creator', a, attrib=attrib)
        if mi.comments:
            m.clear('description')
            m.add('description', mi.comments)
        if mi.publisher:
            m.clear('publisher')
            m.add('publisher', mi.publisher)
        if mi.series:
            m.clear('series')
            m.add('series', mi.series)
        if mi.isbn:
            has = False
            for x in m.identifier:
                if x.scheme.lower() == 'isbn':
                    x.content = mi.isbn
                    has = True
            if not has:
                m.add('identifier', mi.isbn, scheme='ISBN')
        if mi.language:
            m.clear('language')
            m.add('language', mi.language)
        if mi.book_producer:
            m.filter('creator', lambda x : x.role.lower() == 'bkp')
            m.add('creator', mi.book_producer, role='bkp')
        if mi.series_index is not None:
            m.clear('series_index')
            m.add('series_index', '%.2f'%mi.series_index)
        if mi.rating is not None:
            m.clear('rating')
            m.add('rating', '%.2f'%mi.rating)
        if mi.tags:
            m.clear('subject')
            for t in mi.tags:
                m.add('subject', t)
        self.set_cover(mi, prefer_metadata_cover)
    def set_cover(self, mi, prefer_metadata_cover):
        cdata = ''
        if mi.cover and os.access(mi.cover, os.R_OK):
            cdata = open(mi.cover, 'rb').read()
        elif mi.cover_data and mi.cover_data[-1]:
            cdata = mi.cover_data[1]
        if not cdata: return
        if 'cover' in self.oeb.guide:
            if not prefer_metadata_cover:
                href = self.oeb.guide['cover'].href
                self.oeb.manifest.hrefs[href]._data = cdata
        else:
            id, href = self.oeb.manifest.generate('cover', 'cover.jpg')
            self.oeb.manifest.add(id, href, 'image/jpeg', data=cdata)
            self.oeb.guide.add('cover', 'Cover', href)
--- a/src/calibre/ebooks/oeb/transforms/rescale.py
+++ b/src/calibre/ebooks/oeb/transforms/rescale.py
@ -0,0 +1,37 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre import fit_image
 class RescaleImages(object):
    'Rescale all images to fit inside given screen size'
    def __call__(self, oeb, opts):
        from PyQt4.Qt import QApplication, QImage, Qt
        from calibre.gui2 import pixmap_to_data
        self.oeb, self.opts, self.log = oeb, opts, oeb.log
        page_width, page_height = opts.dest.width, opts.dest.height
        for item in oeb.manifest:
            if item.media_type.startswith('image'):
                raw = item.data
                if not raw: continue
                if QApplication.instance() is None:
                    QApplication([])
                img = QImage(10, 10, QImage.Format_ARGB32_Premultiplied)
                if not img.loadFromData(raw): continue
                width, height = img.width(), img.height()
                scaled, new_width, new_height = fit_image(width, height,
                        page_width, page_height)
                if scaled:
                    self.log('Rescaling image', item.href)
                    img = img.scaled(new_width, new_height,
                            Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
                    item.data = pixmap_to_data(img)
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@ -16,8 +16,8 @@ from lxml import etree
 from lxml.cssselect import CSSSelector
 from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \
-        urldefrag, rewrite_links
+        urldefrag, rewrite_links, urlunquote
-from calibre.ebooks.epub import tostring, rules
+from calibre.ebooks.epub import rules
 XPath = functools.partial(_XPath, namespaces=NAMESPACES)
@ -25,6 +25,9 @@ XPath = functools.partial(_XPath, namespaces=NAMESPACES)
 SPLIT_ATTR       = 'cs'
 SPLIT_POINT_ATTR = 'csp'
 def tostring(root):
    return etree.tostring(root, encoding='utf-8')
 class SplitError(ValueError):
    def __init__(self, path, root):
@ -142,7 +145,7 @@ class Split(object):
            nhref = anchor_map[frag if frag else None]
            nhref = self.current_item.relhref(nhref)
            if frag:
-                nhref = '#'.join((nhref, frag))
+                nhref = '#'.join((urlunquote(nhref), frag))
            return nhref
        return url
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@ -11,7 +11,7 @@ import re
 from lxml import etree
 from urlparse import urlparse
-from calibre.ebooks.oeb.base import XPNSMAP, TOC
+from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
 XPath = lambda x: etree.XPath(x, namespaces=XPNSMAP)
 class DetectStructure(object):
@ -63,11 +63,11 @@ class DetectStructure(object):
                if chapter_mark == 'none':
                    continue
                elif chapter_mark == 'rule':
-                    mark = etree.Element('hr')
+                    mark = etree.Element(XHTML('hr'))
                elif chapter_mark == 'pagebreak':
-                    mark = etree.Element('div', style=page_break_after)
+                    mark = etree.Element(XHTML('div'), style=page_break_after)
                else: # chapter_mark == 'both':
-                    mark = etree.Element('hr', style=page_break_before)
+                    mark = etree.Element(XHTML('hr'), style=page_break_before)
                elem.addprevious(mark)
    def create_level_based_toc(self):
@ -114,12 +114,13 @@ class DetectStructure(object):
    def add_leveled_toc_items(self, item):
        level1 = XPath(self.opts.level1_toc)(item.data)
        level1_order = []
        document = item
        counter = 1
        if level1:
            added = {}
            for elem in level1:
-                text, _href = self.elem_to_link(item, elem, counter)
+                text, _href = self.elem_to_link(document, elem, counter)
                counter += 1
                if text:
                    node = self.oeb.toc.add(text, _href,
@ -132,11 +133,11 @@ class DetectStructure(object):
                level2 = list(XPath(self.opts.level2_toc)(item.data))
                for elem in level2:
                    level1 = None
-                    for item in item.data.iterdescendants():
+                    for item in document.data.iterdescendants():
                        if item in added.keys():
                            level1 = added[item]
                        elif item == elem and level1 is not None:
-                            text, _href = self.elem_to_link(item, elem, counter)
+                            text, _href = self.elem_to_link(document, elem, counter)
                            counter += 1
                            if text:
                                added2[elem] = level1.add(text, _href,
@ -145,12 +146,12 @@ class DetectStructure(object):
                    level3 = list(XPath(self.opts.level3_toc)(item.data))
                    for elem in level3:
                        level2 = None
-                        for item in item.data.iterdescendants():
+                        for item in document.data.iterdescendants():
                            if item in added2.keys():
                                level2 = added2[item]
                            elif item == elem and level2 is not None:
                                text, _href = \
-                                        self.elem_to_link(item, elem, counter)
+                                        self.elem_to_link(document, elem, counter)
                                counter += 1
                                if text:
                                    level2.add(text, _href,
--- a/src/calibre/ebooks/pdb/header.py
+++ b/src/calibre/ebooks/pdb/header.py
@ -34,7 +34,7 @@ class PdbHeaderReader(object):
    def full_section_info(self, number):
        if number not in range(0, self.num_sections):
            raise ValueError('Not a valid section number %i' % number)
-            
+
        self.stream.seek(78+number*8)
        offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', self.stream.read(8))[0]
        flags, val = a1, a2<<16 | a3<<8 | a4
@ -43,14 +43,14 @@ class PdbHeaderReader(object):
    def section_offset(self, number):
        if number not in range(0, self.num_sections):
            raise ValueError('Not a valid section number %i' % number)
-            
+
        self.stream.seek(78+number*8)
        return struct.unpack('>LBBBB', self.stream.read(8))[0]
    def section_data(self, number):
        if number not in range(0, self.num_sections):
            raise ValueError('Not a valid section number %i' % number)
-            
+
        start = self.section_offset(number)
        if number == self.num_sections -1:
            end = os.stat(self.stream.name).st_size
@ -68,10 +68,10 @@ class PdbHeaderWriter(object):
    def build_header(self, offsets):
        '''
-        Sections is a list of section offsets
+        Offsets is a list of section offsets
        '''
-    
+
-        
+
-    
+
-        
+
        return header
--- a/src/calibre/ebooks/pdb/input.py
+++ b/src/calibre/ebooks/pdb/input.py
@ -17,18 +17,18 @@ class PDBInput(InputFormatPlugin):
    author      = 'John Schember'
    description = 'Convert PDB to HTML'
    file_types  = set(['pdb'])
-    
+
    def convert(self, stream, options, file_ext, log,
                accelerators):
        header = PdbHeaderReader(stream)
        Reader = get_reader(header.ident)
-        
+
        if Reader is None:
            raise PDBError('Unknown format in pdb file. Identity is %s' % header.identity)
        log.debug('Detected ebook format as: %s with identity: %s' % (IDENTITY_TO_NAME[header.ident], header.ident))
-            
+
        reader = Reader(header, stream, log, options.input_encoding)
        opf = reader.extract_content(os.getcwd())
-        
+
        return opf
--- a/src/calibre/gui2/dialogs/epub.py
+++ b/src/calibre/gui2/dialogs/epub.py
@ -1,292 +0,0 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 The GUI for conversion to EPUB.
 '''
 import os, uuid
 from PyQt4.Qt import QDialog, QSpinBox, QDoubleSpinBox, QComboBox, QLineEdit, \
                     QTextEdit, QCheckBox, Qt, QPixmap, QIcon, QListWidgetItem, SIGNAL
 from lxml.etree import XPath
 from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
 from calibre.gui2.dialogs.epub_ui import Ui_Dialog
 from calibre.gui2 import error_dialog, choose_images, pixmap_to_data, ResizableDialog
 from calibre.ebooks.epub.from_any import SOURCE_FORMATS, config as epubconfig
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.metadata import authors_to_string, string_to_authors
 class Config(ResizableDialog, Ui_Dialog):
    OUTPUT = 'EPUB'
    def __init__(self, parent, db, row=None, config=epubconfig):
        ResizableDialog.__init__(self, parent)
        self.hide_controls()
        self.connect(self.category_list, SIGNAL('itemEntered(QListWidgetItem *)'),
                        self.show_category_help)
        self.connect(self.cover_button, SIGNAL("clicked()"), self.select_cover)
        self.cover_changed = False
        self.db = db
        self.id = None
        self.row = row
        if row is not None:
            self.id = db.id(row)
            base = config().as_string() + '\n\n'
            defaults = self.db.conversion_options(self.id, self.OUTPUT.lower())
            defaults = base + (defaults if defaults else '')
            self.config = config(defaults=defaults)
        else:
            self.config = config()
        self.initialize()
        self.get_source_format()
        self.category_list.setCurrentRow(0)
        if self.row is None:
            self.setWindowTitle(_('Bulk convert to ')+self.OUTPUT)
        else:
            self.setWindowTitle((_(u'Convert %s to ')%unicode(self.title.text()))+self.OUTPUT)
    def hide_controls(self):
        self.source_profile_label.setVisible(False)
        self.opt_source_profile.setVisible(False)
        self.dest_profile_label.setVisible(False)
        self.opt_dest_profile.setVisible(False)
        self.opt_toc_title.setVisible(False)
        self.toc_title_label.setVisible(False)
        self.opt_rescale_images.setVisible(False)
        self.opt_ignore_tables.setVisible(False)
        self.opt_prefer_author_sort.setVisible(False)
    def initialize(self):
        self.__w = []
        self.__w.append(QIcon(':/images/dialog_information.svg'))
        self.item1 = QListWidgetItem(self.__w[-1], _('Metadata'), self.category_list)
        self.__w.append(QIcon(':/images/lookfeel.svg'))
        self.item2 = QListWidgetItem(self.__w[-1], _('Look & Feel').replace(' ','\n'), self.category_list)
        self.__w.append(QIcon(':/images/page.svg'))
        self.item3 = QListWidgetItem(self.__w[-1], _('Page Setup').replace(' ','\n'), self.category_list)
        self.__w.append(QIcon(':/images/chapters.svg'))
        self.item4 = QListWidgetItem(self.__w[-1], _('Chapter Detection').replace(' ','\n'), self.category_list)
        self.setup_tooltips()
        self.initialize_options()
    def set_help(self, msg):
        if msg and getattr(msg, 'strip', lambda:True)():
            self.help_view.setPlainText(msg)
    def setup_tooltips(self):
        for opt in self.config.option_set.preferences:
            g = getattr(self, 'opt_'+opt.name, False)
            if opt.help and g:
                help = opt.help.replace('%default', str(opt.default))
                g._help = help
                g.setToolTip(help.replace('<', '&lt;').replace('>', '&gt;'))
                g.setWhatsThis(help.replace('<', '&lt;').replace('>', '&gt;'))
                g.__class__.enterEvent = lambda obj, event: self.set_help(getattr(obj, '_help', obj.toolTip()))
    def show_category_help(self, item):
        text = unicode(item.text())
        help = {
                _('Metadata')          : _('Specify metadata such as title and author for the book.\n\nMetadata will be updated in the database as well as the generated %s file.')%self.OUTPUT,
                _('Look & Feel')       : _('Adjust the look of the generated ebook by specifying things like font sizes.'),
                _('Page Setup')        : _('Specify the page layout settings like margins.'),
                _('Chapter Detection') : _('Fine tune the detection of chapter and section headings.'),
                }
        self.set_help(help[text.replace('\n', ' ')])
    def select_cover(self):
        files = choose_images(self, 'change cover dialog',
                             _('Choose cover for ') + unicode(self.title.text()))
        if not files:
            return
        _file = files[0]
        if _file:
            _file = os.path.abspath(_file)
            if not os.access(_file, os.R_OK):
                d = error_dialog(self.window, _('Cannot read'),
                        _('You do not have permission to read the file: ') + _file)
                d.exec_()
                return
            cf, cover = None, None
            try:
                cf = open(_file, "rb")
                cover = cf.read()
            except IOError, e:
                d = error_dialog(self.window, _('Error reading file'),
                        _("<p>There was an error reading from file: <br /><b>") + _file + "</b></p><br />"+str(e))
                d.exec_()
            if cover:
                pix = QPixmap()
                pix.loadFromData(cover)
                if pix.isNull():
                    d = error_dialog(self.window, _('Error reading file'),
                                      _file + _(" is not a valid picture"))
                    d.exec_()
                else:
                    self.cover_path.setText(_file)
                    self.cover.setPixmap(pix)
                    self.cover_changed = True
                    self.cpixmap = pix
    def initialize_metadata_options(self):
        all_series = self.db.all_series()
        all_series.sort(cmp=lambda x, y : cmp(x[1], y[1]))
        for series in all_series:
            self.series.addItem(series[1])
        self.series.setCurrentIndex(-1)
        if self.row is not None:
            mi = self.db.get_metadata(self.id, index_is_id=True)
            self.title.setText(mi.title)
            if mi.authors:
                self.author.setText(authors_to_string(mi.authors))
            else:
                self.author.setText('')
            self.publisher.setText(mi.publisher if mi.publisher else '')
            self.author_sort.setText(mi.author_sort if mi.author_sort else '')
            self.tags.setText(', '.join(mi.tags if mi.tags else []))
            self.comment.setText(mi.comments if mi.comments else '')
            if mi.series:
                self.series.setCurrentIndex(self.series.findText(mi.series))
            if mi.series_index is not None:
                self.series_index.setValue(mi.series_index)
            cover = self.db.cover(self.id, index_is_id=True)
            if cover:
                pm = QPixmap()
                pm.loadFromData(cover)
                if not pm.isNull():
                    self.cover.setPixmap(pm)
    def get_title_and_authors(self):
        title = unicode(self.title.text()).strip()
        if not title:
            title = _('Unknown')
        authors = unicode(self.author.text()).strip()
        authors = string_to_authors(authors) if authors else [_('Unknown')]
        return title, authors
    def get_metadata(self):
        title, authors = self.get_title_and_authors()
        mi = MetaInformation(title, authors)
        publisher = unicode(self.publisher.text()).strip()
        if publisher:
            mi.publisher = publisher
        author_sort = unicode(self.author_sort.text()).strip()
        if author_sort:
            mi.author_sort = author_sort
        comments = unicode(self.comment.toPlainText()).strip()
        if comments:
            mi.comments = comments
        mi.series_index = int(self.series_index.value())
        if self.series.currentIndex() > -1:
            mi.series = unicode(self.series.currentText()).strip()
        tags = [t.strip() for t in unicode(self.tags.text()).strip().split(',')]
        if tags:
            mi.tags = tags
        return mi
    def read_settings(self):
        for pref in self.config.option_set.preferences:
            g = getattr(self, 'opt_'+pref.name, False)
            if g:
                if isinstance(g, (QSpinBox, QDoubleSpinBox)):
                    self.config.set(pref.name, g.value())
                elif isinstance(g, (QLineEdit, QTextEdit)):
                    func = getattr(g, 'toPlainText', getattr(g, 'text', None))()
                    val = unicode(func)
                    self.config.set(pref.name, val if val else None)
                elif isinstance(g, QComboBox):
                    self.config.set(pref.name, unicode(g.currentText()))
                elif isinstance(g, QCheckBox):
                    self.config.set(pref.name, bool(g.isChecked()))
        if self.row is not None:
            self.db.set_conversion_options(self.id, self.OUTPUT.lower(), self.config.src)
    def initialize_options(self):
        self.initialize_metadata_options()
        values = self.config.parse()
        for pref in self.config.option_set.preferences:
            g = getattr(self, 'opt_'+pref.name, False)
            if g:
                val = getattr(values, pref.name)
                if val is None:
                    continue
                if isinstance(g, (QSpinBox, QDoubleSpinBox)):
                    g.setValue(val)
                elif isinstance(g, (QLineEdit, QTextEdit)):
                    getattr(g, 'setPlainText', g.setText)(val)
                    getattr(g, 'setCursorPosition', lambda x: x)(0)
                elif isinstance(g, QComboBox):
                    for value in pref.choices:
                        g.addItem(value)
                    g.setCurrentIndex(g.findText(val))
                elif isinstance(g, QCheckBox):
                    g.setCheckState(Qt.Checked if bool(val) else Qt.Unchecked)
    def get_source_format(self):
        self.source_format = None
        if self.row is not None:
            temp = self.db.formats(self.id, index_is_id=True)
            if not temp:
                error_dialog(self.parent(), _('Cannot convert'),
                             _('This book has no available formats')).exec_()
            available_formats = [f.upper().strip() for f in temp.split(',')]
            choices = [fmt.upper() for fmt in SOURCE_FORMATS if fmt.upper() in available_formats]
            if not choices:
                error_dialog(self.parent(), _('No available formats'),
                            _('Cannot convert %s as this book has no supported formats')%(self.title.text())).exec_()
            elif len(choices) == 1:
                self.source_format = choices[0]
            else:
                d = ChooseFormatDialog(self.parent(), _('Choose the format to convert to ')+self.OUTPUT, choices)
                if d.exec_() == QDialog.Accepted:
                    self.source_format = d.format()
    def accept(self):
        for opt in ('chapter', 'level1_toc', 'level2_toc', 'level3_toc', 'page',
                    'page_names'):
            text = unicode(getattr(self, 'opt_'+opt).text())
            if text:
                try:
                    XPath(text,namespaces={'re':'http://exslt.org/regular-expressions'})
                except Exception, err:
                    error_dialog(self, _('Invalid XPath expression'),
                        _('The expression %s is invalid. Error: %s')%(text, err)
                                 ).exec_()
                    return
        mi = self.get_metadata()
        self.user_mi = mi
        self.read_settings()
        self.cover_file = None
        if self.row is not None:
            self.db.set_metadata(self.id, mi)
            self.mi = self.db.get_metadata(self.id, index_is_id=True)
            self.mi.application_id = uuid.uuid4()
            opf = OPFCreator(os.getcwdu(), self.mi)
            self.opf_file = PersistentTemporaryFile('.opf')
            opf.render(self.opf_file)
            self.opf_file.close()
            if self.cover_changed:
                self.db.set_cover(self.id, pixmap_to_data(self.cover.pixmap()))
            cover = self.db.cover(self.id, index_is_id=True)
            if cover:
                cf = PersistentTemporaryFile('.jpeg')
                cf.write(cover)
                cf.close()
                self.cover_file = cf
        self.opts = self.config.parse()
        QDialog.accept(self)
--- a/src/calibre/gui2/dialogs/epub.ui
+++ b/src/calibre/gui2/dialogs/epub.ui
--- a/src/calibre/gui2/dialogs/lrf_single.py
+++ b/src/calibre/gui2/dialogs/lrf_single.py
@ -1,425 +0,0 @@
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import os, codecs
 from PyQt4.QtCore import QObject, SIGNAL, Qt
 from PyQt4.QtGui import QAbstractSpinBox, QLineEdit, QCheckBox, QDialog, \
                        QPixmap, QTextEdit, QListWidgetItem, QIcon
 from calibre.gui2.dialogs.lrf_single_ui import Ui_LRFSingleDialog
 from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
 from calibre.gui2 import qstring_to_unicode, error_dialog, \
                           pixmap_to_data, choose_images, config
 from calibre.gui2.widgets import FontFamilyModel
 from calibre.ebooks.lrf import option_parser
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.constants import __appname__
 from calibre.ebooks.metadata import authors_to_string, string_to_authors, authors_to_sort_string
 font_family_model = None
 class LRFSingleDialog(QDialog, Ui_LRFSingleDialog):
    PARSER = option_parser('')
    PREPROCESS_OPTIONS = [ o for o in PARSER.option_groups if o.title == 'PREPROCESSING OPTIONS'][0].option_list
    @classmethod
    def options(cls):
        options = cls.PARSER.option_list
        for g in cls.PARSER.option_groups:
            options.extend(g.option_list)
        for opt in options:
            yield opt
    @classmethod
    def option_to_name(cls, opt):
        src = opt.get_opt_string()
        return 'gui_' + src[2:].replace('-', '_')
    def initialize_common(self):
        self.output_format = 'LRF'
        self.setup_tooltips()
        self.initialize_options()
        global font_family_model
        if font_family_model is None:
            font_family_model = FontFamilyModel()
        self.font_family_model = font_family_model
        self.gui_serif_family.setModel(self.font_family_model)
        self.gui_sans_family.setModel(self.font_family_model)
        self.gui_mono_family.setModel(self.font_family_model)
        self.load_saved_global_defaults()
    def populate_list(self):
        self.__w = []
        self.__w.append(QIcon(':/images/dialog_information.svg'))
        self.item1 = QListWidgetItem(self.__w[-1], _("Metadata"), self.categoryList)
        self.__w.append(QIcon(':/images/lookfeel.svg'))
        self.item2 = QListWidgetItem(self.__w[-1], _('Look & Feel'), self.categoryList)
        self.__w.append(QIcon(':/images/page.svg'))
        self.item3 = QListWidgetItem(self.__w[-1], _('Page Setup'), self.categoryList)
        self.__w.append(QIcon(':/images/chapters.svg'))
        self.item4 = QListWidgetItem(self.__w[-1], _('Chapter Detection'), self.categoryList)
    def __init__(self, window, db, row):
        QDialog.__init__(self, window)
        Ui_LRFSingleDialog.__init__(self)
        self.setupUi(self)
        self.populate_list()
        self.categoryList.setCurrentRow(0)
        QObject.connect(self.categoryList, SIGNAL('itemEntered(QListWidgetItem *)'),
                        self.show_category_help)
        QObject.connect(self.cover_button, SIGNAL("clicked(bool)"), self.select_cover)
        #self.categoryList.leaveEvent = self.reset_help
        self.reset_help()
        self.selected_format = None
        self.initialize_common()
        self.db = db
        self.row = row
        self.cover_changed = False
        self.cpixmap = None
        self.changed = False
        if db:
            self.id = self.db.id(self.row)
            self.read_saved_options()
            self.initialize_metadata()
            formats = self.db.formats(self.row)
            formats = [i.upper() for i in formats.split(',')] if formats else []
            try:
                formats.remove(self.output_format)
            except ValueError:
                pass        
            if not formats:
                d = error_dialog(window, _('No available formats'),
                        _('Cannot convert %s as this book has no supported formats')%(self.gui_title.text()))
                d.exec_()
            if len(formats) > 1:
                d = ChooseFormatDialog(window, _('Choose the format to convert into LRF'), formats)
                d.exec_()
                if d.result() == QDialog.Accepted:
                    self.selected_format = d.format()
            elif len(formats) > 0:
                self.selected_format = formats[0]
            if self.selected_format:
                self.setWindowTitle(_('Convert %s to LRF')%(self.selected_format,))
        else:
            self.setWindowTitle(_('Set conversion defaults'))
    def load_saved_global_defaults(self):
        cmdline = config['LRF_conversion_defaults']
        if cmdline:
            self.set_options_from_cmdline(cmdline)
    def set_options_from_cmdline(self, cmdline):
        for opt in self.options():
            guiname = self.option_to_name(opt)
            try:
                obj = getattr(self, guiname)
            except AttributeError:
                continue
            if isinstance(obj, QCheckBox):
                if opt.get_opt_string() in cmdline:
                    obj.setCheckState(Qt.Checked)
                else:
                    obj.setCheckState(Qt.Unchecked)
            try:
                i = cmdline.index(opt.get_opt_string())
            except ValueError:
                continue
            if isinstance(obj, QAbstractSpinBox):
                obj.setValue(cmdline[i+1])
            elif isinstance(obj, QLineEdit):
                obj.setText(cmdline[i+1])
            elif isinstance(obj, QTextEdit):
                obj.setPlainText(cmdline[i+1])
        profile = cmdline[cmdline.index('--profile')+1]
        pindex = self.gui_profile.findText(profile)
        if pindex >= 0:
            self.gui_profile.setCurrentIndex(pindex)
        for prepro in self.PREPROCESS_OPTIONS:
            ops = prepro.get_opt_string() 
            if ops in cmdline:
                self.preprocess.setCurrentIndex(self.preprocess.findText(ops[2:]))
                break
        for opt in ('--serif-family', '--sans-family', '--mono-family'):
            if opt in cmdline:
                print 'in'
                family = cmdline[cmdline.index(opt)+1].split(',')[-1].strip()
                obj = getattr(self, 'gui_'+opt[2:].replace('-', '_'))
                try:
                    obj.setCurrentIndex(self.font_family_model.index_of(family))
                except:
                    continue
    def read_saved_options(self):
        cmdline = self.db.conversion_options(self.id, self.output_format.lower())
        if cmdline:
            self.set_options_from_cmdline(cmdline)
    def select_cover(self, checked):
        files = choose_images(self, 'change cover dialog', 
                             _('Choose cover for ') + qstring_to_unicode(self.gui_title.text()))
        if not files:
            return
        _file = files[0]
        if _file:
            _file = os.path.abspath(_file)
            if not os.access(_file, os.R_OK):
                d = error_dialog(self.window, _('Cannot read'), 
                        _('You do not have permission to read the file: ') + _file)
                d.exec_()
                return
            cf, cover = None, None
            try:
                cf = open(_file, "rb")
                cover = cf.read()
            except IOError, e: 
                d = error_dialog(self.window, _('Error reading file'),
                        _("<p>There was an error reading from file: <br /><b>") + _file + "</b></p><br />"+str(e))
                d.exec_()
            if cover:
                pix = QPixmap()
                pix.loadFromData(cover)
                if pix.isNull():
                    d = error_dialog(self.window, _file + _(" is not a valid picture"))
                    d.exec_()
                else:
                    self.cover_path.setText(_file)
                    self.cover.setPixmap(pix)
                    self.cover_changed = True
                    self.cpixmap = pix
    def initialize_metadata(self):
        db, row = self.db, self.row
        self.id = self.db.id(row) 
        self.gui_title.setText(db.title(row))
        au = self.db.authors(row)
        if au:
            au = [a.strip().replace('|', ',') for a in au.split(',')]
            self.gui_author.setText(authors_to_string(au))
        else:
            self.gui_author.setText('')
        aus = self.db.author_sort(row)
        self.gui_author_sort.setText(aus if aus else '')
        pub = self.db.publisher(row)
        self.gui_publisher.setText(pub if pub else '')
        tags = self.db.tags(row)
        self.tags.setText(tags if tags else '')
        comments = self.db.comments(row)
        self.gui_comment.setPlainText(comments if comments else '')
        all_series = self.db.all_series()
        all_series.sort(cmp=lambda x, y : cmp(x[1], y[1]))
        series_id = self.db.series_id(row)
        idx, c = None, 0
        for i in all_series:
            id, name = i
            if id == series_id:
                idx = c
            self.series.addItem(name)
            c += 1
        self.series.lineEdit().setText('')
        if idx is not None:
            self.series.setCurrentIndex(idx)
        self.series_index.setValue(self.db.series_index(row))
        cover = self.db.cover(row)
        if cover:
            pm = QPixmap()
            pm.loadFromData(cover)
            if not pm.isNull(): 
                self.cover.setPixmap(pm)  
    def initialize_options(self):
        '''Initialize non metadata options from the defaults.'''
        for name in self.option_map.keys():
            default = self.option_map[name].default
            obj = getattr(self, name)
            if isinstance(obj, QAbstractSpinBox):
                obj.setValue(default)
            elif isinstance(obj, QLineEdit) and default:
                obj.setText(default)
            elif isinstance(obj, QTextEdit) and default:
                obj.setPlainText(default)
            elif isinstance(obj, QCheckBox):
                state = Qt.Checked if default else Qt.Unchecked
                obj.setCheckState(state)
        self.gui_headerformat.setDisabled(True)
        self.gui_header_separation.setDisabled(True)
        self.gui_use_metadata_cover.setCheckState(Qt.Checked)
        self.preprocess.addItem(_('No preprocessing'))
        for opt in self.PREPROCESS_OPTIONS:
            self.preprocess.addItem(opt.get_opt_string()[2:])
        ph = _('Preprocess the file before converting to LRF. This is useful if you know that the file is from a specific source. Known sources:')
        ph += _('<ol><li><b>baen</b> - Books from BAEN Publishers</li>')
        ph += _('<li><b>pdftohtml</b> - HTML files that are the output of the program pdftohtml</li>')
        ph += _('<li><b>book-designer</b> - HTML0 files from Book Designer</li>')
        self.preprocess.setToolTip(ph)
        self.preprocess.setWhatsThis(ph)
        for profile in self.PARSER.get_option('--profile').choices:
            if self.gui_profile.findText(profile) < 0:
                self.gui_profile.addItem(profile)
    def setup_tooltips(self):
        def show_item_help(obj, event):
            self.set_help(obj.toolTip())
        self.option_map = {}
        for opt in self.options():
            try:
                help = opt.help.replace('%default', str(opt.default))
            except (ValueError, TypeError):
                help = opt.help
            guiname = self.option_to_name(opt)
            if hasattr(self, guiname):
                obj = getattr(self, guiname)
                obj.setToolTip(help)
                obj.setWhatsThis(help)
                self.option_map[guiname] = opt
                obj.__class__.enterEvent = show_item_help
                #obj.leaveEvent = self.reset_help
        self.preprocess.__class__.enterEvent = show_item_help
        #self.preprocess.leaveEvent = self.reset_help
    def show_category_help(self, item):
        text = qstring_to_unicode(item.text())
        help = {
                _('Metadata')          : _('Specify metadata such as title and author for the book.<p>Metadata will be updated in the database as well as the generated LRF file.'),
                _('Look & Feel')       : _('Adjust the look of the generated LRF file by specifying things like font sizes and the spacing between words.'),
                _('Page Setup')        : _('Specify the page settings like margins and the screen size of the target device.'),
                _('Chapter Detection') : _('Fine tune the detection of chapter and section headings.'),                  
                }
        self.set_help(help[text])
    def set_help(self, msg):
        if msg and getattr(msg, 'strip', lambda:True)():
            self.help_view.setHtml('<html><body>%s</body></html>'%(msg,))
    def reset_help(self, *args):
        self.set_help(_('<font color="gray">No help available</font>'))
        if args:
            args[0].accept()
    def build_commandline(self):
        cmd = [__appname__]
        for name in self.option_map.keys():
            opt = self.option_map[name].get_opt_string()
            obj = getattr(self, name)
            if isinstance(obj, QAbstractSpinBox):
                cmd.extend([opt, obj.value()])
            elif isinstance(obj, QLineEdit):
                val = qstring_to_unicode(obj.text())
                if val:
                    if opt == '--encoding':
                        try:
                            codecs.getdecoder(val)
                        except:
                            d = error_dialog(self, 'Unknown encoding', 
                                             '<p>Unknown encoding: %s<br/>For a list of known encodings see http://docs.python.org/lib/standard-encodings.html'%val)
                            d.exec_()
                            return
                    cmd.extend([opt, val])
            elif isinstance(obj, QTextEdit):
                val = qstring_to_unicode(obj.toPlainText())
                if val:
                    cmd.extend([opt, val])
            elif isinstance(obj, QCheckBox):
                if obj.checkState() == Qt.Checked:
                    cmd.append(opt)
        text = qstring_to_unicode(self.preprocess.currentText())
        if text != _('No preprocessing'):
            cmd.append(u'--'+text)
        cmd.extend([u'--profile',  qstring_to_unicode(self.gui_profile.currentText())])
        for opt in ('--serif-family', '--sans-family', '--mono-family'):
            obj = getattr(self, 'gui_'+opt[2:].replace('-', '_'))
            family = qstring_to_unicode(obj.itemText(obj.currentIndex())).strip()
            if family != 'None':
                cmd.extend([opt, family])
        return cmd        
    def title(self):
        return qstring_to_unicode(self.gui_title.text())
    def write_metadata(self):
        title = qstring_to_unicode(self.gui_title.text())
        self.db.set_title(self.id, title)
        au = unicode(self.gui_author.text())
        if au: 
            self.db.set_authors(self.id, string_to_authors(au))
        aus = qstring_to_unicode(self.gui_author_sort.text())
        if not aus:
            t = self.db.authors(self.id, index_is_id=True)
            if not t:
                t = _('Unknown')
            aus = [a.strip().replace('|', ',') for a in t.split(',')]
            aus = authors_to_sort_string(aus)
        self.db.set_author_sort(self.id, aus)
        self.db.set_publisher(self.id, qstring_to_unicode(self.gui_publisher.text()))
        self.db.set_tags(self.id, qstring_to_unicode(self.tags.text()).split(','))
        self.db.set_series(self.id, qstring_to_unicode(self.series.currentText()))
        self.db.set_series_index(self.id, self.series_index.value())
        if self.cover_changed:
            self.db.set_cover(self.id, pixmap_to_data(self.cover.pixmap()))
    def accept(self):
        cmdline = self.build_commandline()
        if cmdline is None:
            return
        if self.db:
            self.cover_file = None
            self.write_metadata()
            cover = self.db.cover(self.row)
            if cover:
                self.cover_file = PersistentTemporaryFile(suffix='.jpeg')
                self.cover_file.write(cover)
                self.cover_file.close()
            self.db.set_conversion_options(self.id, self.output_format.lower(), cmdline)
            if self.cover_file:
                cmdline.extend([u'--cover', self.cover_file.name])
            self.cmdline = [unicode(i) for i in cmdline]
        else:
            config.set('LRF_conversion_defaults', cmdline)
        QDialog.accept(self)
 class LRFBulkDialog(LRFSingleDialog):
    def __init__(self, window):
        QDialog.__init__(self, window)
        Ui_LRFSingleDialog.__init__(self)
        self.setupUi(self)
        self.populate_list()
        self.categoryList.takeItem(0)
        self.stack.removeWidget(self.stack.widget(0))
        self.categoryList.setCurrentRow(0)
        self.initialize_common()
        self.setWindowTitle(_('Bulk convert ebooks to LRF'))
    def accept(self):
        self.cmdline = [unicode(i) for i in self.build_commandline()]
        for meta in ('--title', '--author', '--publisher', '--comment'):
            try:
                index = self.cmdline.index(meta)
                self.cmdline[index:index+2] = []
            except ValueError:
                continue
        self.cover_file = None
        QDialog.accept(self)
--- a/src/calibre/gui2/dialogs/lrf_single.ui
+++ b/src/calibre/gui2/dialogs/lrf_single.ui
--- a/src/calibre/gui2/dialogs/mobi.py
+++ b/src/calibre/gui2/dialogs/mobi.py
@ -1,22 +0,0 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 from calibre.gui2.dialogs.epub import Config as _Config
 from calibre.ebooks.mobi.from_any import config as mobiconfig
 class Config(_Config):
    OUTPUT = 'MOBI'
    def __init__(self, parent, db, row=None):
        _Config.__init__(self, parent, db, row=row, config=mobiconfig)
    def hide_controls(self):
        self.profile_label.setVisible(False)
        self.opt_profile.setVisible(False)
        self.opt_dont_split_on_page_breaks.setVisible(False)
        self.opt_preserve_tag_structure.setVisible(False)
        self.opt_linearize_tables.setVisible(False)
        self.page_map_box.setVisible(False)
--- a/src/calibre/gui2/tools.py
+++ b/src/calibre/gui2/tools.py
@ -11,17 +11,8 @@ from PyQt4.Qt import QDialog
 from calibre.customize.ui import available_input_formats
 from calibre.utils.config import prefs
 from calibre.gui2.dialogs.lrf_single import LRFSingleDialog, LRFBulkDialog
 from calibre.gui2.dialogs.epub import Config as EPUBConvert
 from calibre.gui2.dialogs.mobi import Config as MOBIConvert
 import calibre.gui2.dialogs.comicconf as ComicConf
 from calibre.gui2 import warning_dialog
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.ebooks.lrf import preferred_source_formats as LRF_PREFERRED_SOURCE_FORMATS
 from calibre.ebooks.metadata.opf import OPFCreator
 from calibre.ebooks.epub.from_any import SOURCE_FORMATS as EPUB_PREFERRED_SOURCE_FORMATS, config as epubconfig
 from calibre.ebooks.mobi.from_any import config as mobiconfig
 from calibre.ebooks.lrf.comic.convert_from import config as comicconfig
 # Ordered list of source formats. Items closer to the beginning are
 # preferred for conversion over those toward the end.