From 0b6dc7f8ed784e4a9df6bb59a13f5cb331a6c107 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 22 Apr 2009 14:35:32 -0700
Subject: [PATCH] Conversion pipeline is now a superset of any2epub :)

---
 src/calibre/ebooks/conversion/cli.py          |  5 ++
 src/calibre/ebooks/conversion/plumber.py      | 74 +++++++++++++++-
 src/calibre/ebooks/epub/output.py             | 22 +++++
 src/calibre/ebooks/metadata/__init__.py       |  3 +
 src/calibre/ebooks/oeb/base.py                | 27 ++++--
 src/calibre/ebooks/oeb/output.py              |  3 +-
 src/calibre/ebooks/oeb/stylizer.py            | 21 +++--
 src/calibre/ebooks/oeb/transforms/flatcss.py  | 43 ++++++++--
 src/calibre/ebooks/oeb/transforms/guide.py    | 47 +++++++++++
 src/calibre/ebooks/oeb/transforms/jacket.py   | 66 +++++++++++++++
 src/calibre/ebooks/oeb/transforms/metadata.py | 84 +++++++++++++++++++
 src/calibre/ebooks/oeb/transforms/split.py    |  4 +-
 12 files changed, 374 insertions(+), 25 deletions(-)
 create mode 100644 src/calibre/ebooks/epub/output.py
 create mode 100644 src/calibre/ebooks/oeb/transforms/guide.py
 create mode 100644 src/calibre/ebooks/oeb/transforms/jacket.py
 create mode 100644 src/calibre/ebooks/oeb/transforms/metadata.py
diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py
index a30549cbc3..ae0af532ab 100644
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@@ -117,6 +117,9 @@ def add_pipeline_options(parser, plumber):
                       'line_height',
                       'linearize_tables',
                       'extra_css',
+                      'margin_top', 'margin_left', 'margin_right',
+                      'margin_bottom', 'dont_justify',
+                      'insert_blank_line', 'remove_paragraph_spacing',
                   ]
                   ),
 
@@ -124,6 +127,8 @@ def add_pipeline_options(parser, plumber):
                   _('Control auto-detection of document structure.'),
                   [
                       'dont_split_on_page_breaks', 'chapter', 'chapter_mark',
+                      'prefer_metadata_cover', 'remove_first_image',
+                      'insert_comments',
                   ]
                   ),
 
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 22c11303ad..f55d677d08 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -195,7 +195,7 @@ OptionRecommendation(name='toc_filter',
 
 OptionRecommendation(name='chapter',
         recommended_value="//*[((name()='h1' or name()='h2') and "
-              "re:test(., 'chapter|book|section|part', 'i')) or @class "
+              r"re:test(., 'chapter|book|section|part\s+', 'i')) or @class "
               "= 'chapter']", level=OptionRecommendation.LOW,
             help=_('An XPath expression to detect chapter titles. The default '
                 'is to consider <h1> or <h2> tags that contain the words '
@@ -227,6 +227,64 @@ OptionRecommendation(name='extra_css',
                 'rules.')
         ),
 
+OptionRecommendation(name='margin_top',
+        recommended_value=5.0, level=OptionRecommendation.LOW,
+        help=_('Set the top margin in pts. Default is %default')),
+
+OptionRecommendation(name='margin_bottom',
+        recommended_value=5.0, level=OptionRecommendation.LOW,
+        help=_('Set the bottom margin in pts. Default is %default')),
+
+OptionRecommendation(name='margin_left',
+        recommended_value=5.0, level=OptionRecommendation.LOW,
+        help=_('Set the left margin in pts. Default is %default')),
+
+OptionRecommendation(name='margin_right',
+        recommended_value=5.0, level=OptionRecommendation.LOW,
+        help=_('Set the right margin in pts. Default is %default')),
+
+OptionRecommendation(name='dont_justify',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Do not force text to be justified in output. Whether text '
+            'is actually displayed justified or not depends on whether '
+            'the ebook format and reading device support justification.')
+        ),
+
+OptionRecommendation(name='remove_paragraph_spacing',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Remove spacing between paragraphs. Also sets an indent on '
+        'paragraphs of 1.5em. Spacing removal will not work '
+        'if the source file does not use paragraphs (<p> or <div> tags).')
+        ),
+
+OptionRecommendation(name='prefer_metadata_cover',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Use the cover detected from the source file in preference '
+        'to the specified cover.')
+        ),
+
+OptionRecommendation(name='insert_blank_line',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Insert a blank line between paragraphs. Will not work '
+            'if the source file does not use paragraphs (<p> or <div> tags).'
+            )
+        ),
+
+OptionRecommendation(name='remove_first_image',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Remove the first image from the input ebook. Useful if the '
+        'first image in the source file is a cover and you are specifying '
+        'an external cover.'
+            )
+        ),
+
+OptionRecommendation(name='insert_comments',
+        recommended_value=False, level=OptionRecommendation.LOW,
+        help=_('Insert the comments/summary from the book metadata at the start of '
+            'the book. This is useful if your ebook reader does not support '
+            'displaying the comments from the metadata.'
+            )
+        ),
 
 
 OptionRecommendation(name='read_metadata_from_opf',
@@ -244,7 +302,8 @@ OptionRecommendation(name='title',
 
 OptionRecommendation(name='authors',
     recommended_value=None, level=OptionRecommendation.LOW,
-    help=_('Set the authors. Multiple authors should be separated ')),
+    help=_('Set the authors. Multiple authors should be separated by '
+    'ampersands.')),
 
 OptionRecommendation(name='title_sort',
     recommended_value=None, level=OptionRecommendation.LOW,
@@ -428,7 +487,6 @@ OptionRecommendation(name='language',
             mi.cover = None
         self.user_metadata = mi
 
-
     def setup_options(self):
         '''
         Setup the `self.opts` object.
@@ -479,9 +537,16 @@ OptionRecommendation(name='language',
         if not hasattr(self.oeb, 'manifest'):
             self.oeb = create_oebbook(self.log, self.oeb, self.opts)
 
+        from calibre.ebooks.oeb.transforms.guide import Clean
+        Clean()(self.oeb, self.opts)
+
         self.opts.source = self.opts.input_profile
         self.opts.dest = self.opts.output_profile
 
+        from calibre.ebooks.oeb.transforms.metadata import MergeMetadata
+        MergeMetadata()(self.oeb, self.user_metadata,
+                self.opts.prefer_metadata_cover)
+
         from calibre.ebooks.oeb.transforms.structure import DetectStructure
         DetectStructure()(self.oeb, self.opts)
 
@@ -495,6 +560,9 @@ OptionRecommendation(name='language',
         else:
             fkey = map(float, fkey.split(','))
 
+        from calibre.ebooks.oeb.transforms.jacket import Jacket
+        Jacket()(self.oeb, self.opts)
+
         if self.opts.extra_css and os.path.exists(self.opts.extra_css):
             self.opts.extra_css = open(self.opts.extra_css, 'rb').read()
 
diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py
new file mode 100644
index 0000000000..4ce13720e0
--- /dev/null
+++ b/src/calibre/ebooks/epub/output.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.customize.conversion import OutputFormatPlugin
+from calibre import CurrentDir
+
+class EPUBOutput(OutputFormatPlugin):
+
+    name = 'EPUB Output'
+    author = 'Kovid Goyal'
+    file_type = 'epub'
+
+    def convert(self, oeb, output_path, input_plugin, opts, log):
+        self.log, self.opts = log, opts
+
+
diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py
index a14950a064..793c607527 100644
--- a/src/calibre/ebooks/metadata/__init__.py
+++ b/src/calibre/ebooks/metadata/__init__.py
@@ -260,6 +260,9 @@ class MetaInformation(object):
             x = 1.0
         return '%d'%x if int(x) == x else '%.2f'%x
 
+    def authors_from_string(self, raw):
+        self.authors = string_to_authors(raw)
+
     def __unicode__(self):
         ans = []
         def fmt(x, y):
diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index a36ad8f676..81120aaf2e 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -514,7 +514,8 @@ class Metadata(object):
         scheme  = Attribute(lambda term: 'scheme' if \
                                 term == OPF('meta') else OPF('scheme'),
                             [DC('identifier'), OPF('meta')])
-        file_as = Attribute(OPF('file-as'), [DC('creator'), DC('contributor')])
+        file_as = Attribute(OPF('file-as'), [DC('creator'), DC('contributor'),
+                                             DC('title')])
         role    = Attribute(OPF('role'), [DC('creator'), DC('contributor')])
         event   = Attribute(OPF('event'), [DC('date')])
         id      = Attribute('id')
@@ -593,6 +594,19 @@ class Metadata(object):
             yield key
     __iter__ = iterkeys
 
+    def clear(self, key):
+        l = self.items[key]
+        for x in list(l):
+            l.remove(x)
+
+    def filter(self, key, predicate):
+        l = self.items[key]
+        for x in list(l):
+            if predicate(x):
+                l.remove(x)
+
+
+
     def __getitem__(self, key):
         return self.items[key]
 
@@ -1011,7 +1025,7 @@ class Manifest(object):
                 media_type = OEB_DOC_MIME
             elif media_type in OEB_STYLES:
                 media_type = OEB_CSS_MIME
-            attrib = {'id': item.id, 'href': item.href,
+            attrib = {'id': item.id, 'href': urlunquote(item.href),
                       'media-type': media_type}
             if item.fallback:
                 attrib['fallback'] = item.fallback
@@ -1202,6 +1216,9 @@ class Guide(object):
         self.refs[type] = ref
         return ref
 
+    def remove(self, type):
+        return self.refs.pop(type, None)
+
     def iterkeys(self):
         for type in self.refs:
             yield type
@@ -1229,7 +1246,7 @@ class Guide(object):
     def to_opf1(self, parent=None):
         elem = element(parent, 'guide')
         for ref in self.refs.values():
-            attrib = {'type': ref.type, 'href': ref.href}
+            attrib = {'type': ref.type, 'href': urlunquote(ref.href)}
             if ref.title:
                 attrib['title'] = ref.title
             element(elem, 'reference', attrib=attrib)
@@ -1345,7 +1362,7 @@ class TOC(object):
     def to_opf1(self, tour):
         for node in self.nodes:
             element(tour, 'site', attrib={
-                'title': node.title, 'href': node.href})
+                'title': node.title, 'href': urlunquote(node.href)})
             node.to_opf1(tour)
         return tour
 
@@ -1358,7 +1375,7 @@ class TOC(object):
             point = element(parent, NCX('navPoint'), attrib=attrib)
             label = etree.SubElement(point, NCX('navLabel'))
             element(label, NCX('text')).text = node.title
-            element(point, NCX('content'), src=node.href)
+            element(point, NCX('content'), src=urlunquote(node.href))
             node.to_ncx(point)
         return parent
 
diff --git a/src/calibre/ebooks/oeb/output.py b/src/calibre/ebooks/oeb/output.py
index ba62897215..6f141f7e5e 100644
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@@ -9,6 +9,7 @@ from lxml import etree
 
 from calibre.customize.conversion import OutputFormatPlugin
 from calibre import CurrentDir
+from urllib import unquote
 
 class OEBOutput(OutputFormatPlugin):
 
@@ -32,7 +33,7 @@ class OEBOutput(OutputFormatPlugin):
                         f.write(raw)
 
             for item in oeb_book.manifest:
-                path = os.path.abspath(item.href)
+                path = os.path.abspath(unquote(item.href))
                 dir = os.path.dirname(path)
                 if not os.path.exists(dir):
                     os.makedirs(dir)
diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py
index 34abea32f5..752a135db3 100644
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@@ -11,6 +11,7 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 import os
 import itertools
 import re
+import logging
 import copy
 from weakref import WeakKeyDictionary
 from xml.dom import SyntaxErr as CSSSyntaxError
@@ -106,7 +107,8 @@ class CSSSelector(etree.XPath):
 class Stylizer(object):
     STYLESHEETS = WeakKeyDictionary()
 
-    def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'], extra_css=''):
+    def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'],
+            extra_css='', user_css=''):
         self.oeb = oeb
         self.profile = profile
         self.logger = oeb.logger
@@ -115,7 +117,8 @@ class Stylizer(object):
         cssname = os.path.splitext(basename)[0] + '.css'
         stylesheets = [HTML_CSS_STYLESHEET]
         head = xpath(tree, '/h:html/h:head')[0]
-        parser = cssutils.CSSParser(fetcher=self._fetch_css_file)
+        parser = cssutils.CSSParser(fetcher=self._fetch_css_file,
+                log=logging.getLogger('calibre.css'))
         for elem in head:
             if elem.tag == XHTML('style') and elem.text \
                and elem.get('type', CSS_MIME) in OEB_STYLES:
@@ -135,11 +138,12 @@ class Stylizer(object):
                         (path, item.href))
                     continue
                 stylesheets.append(sitem.data)
-        if extra_css:
-            text = XHTML_CSS_NAMESPACE + extra_css
-            stylesheet = parser.parseString(text, href=cssname)
-            stylesheet.namespaces['h'] = XHTML_NS
-            stylesheets.append(stylesheet)
+        for x in (extra_css, user_css):
+            if x:
+                text = XHTML_CSS_NAMESPACE + x
+                stylesheet = parser.parseString(text, href=cssname)
+                stylesheet.namespaces['h'] = XHTML_NS
+                stylesheets.append(stylesheet)
         rules = []
         index = 0
         self.stylesheets = set()
@@ -288,6 +292,9 @@ class Style(object):
         self._lineHeight = None
         stylizer._styles[element] = self
 
+    def set(self, prop, val):
+        self._style[prop] = val
+
     def _update_cssdict(self, cssdict):
         self._style.update(cssdict)
 
diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py
index ca96d28a8d..216697ae53 100644
--- a/src/calibre/ebooks/oeb/transforms/flatcss.py
+++ b/src/calibre/ebooks/oeb/transforms/flatcss.py
@@ -114,12 +114,27 @@ class CSSFlattener(object):
     def stylize_spine(self):
         self.stylizers = {}
         profile = self.context.source
+        css = ''
         for item in self.oeb.spine:
             html = item.data
+            body = html.find(XHTML('body'))
+            bs = body.get('style', '').split(';')
+            bs.append('margin-top: 0pt')
+            bs.append('margin-bottom: 0pt')
+            bs.append('margin-left : %fpt'%\
+                    float(self.context.margin_left))
+            bs.append('margin-right : %fpt'%\
+                    float(self.context.margin_right))
+            bs.append('text-align: '+ \
+                    ('left' if self.context.dont_justify else 'justify'))
+            body.set('style', '; '.join(bs))
+
             stylizer = Stylizer(html, item.href, self.oeb, profile,
-                    extra_css=self.context.extra_css)
+                    user_css=self.context.extra_css,
+                    extra_css=css)
             self.stylizers[item] = stylizer
 
+
     def baseline_node(self, node, stylizer, sizes, csize):
         csize = stylizer.style(node)['font-size']
         if node.text:
@@ -219,6 +234,15 @@ class CSSFlattener(object):
         if self.lineh and 'line-height' not in cssdict:
             lineh = self.lineh / psize
             cssdict['line-height'] = "%0.5fem" % lineh
+        if (self.context.remove_paragraph_spacing or
+                self.context.insert_blank_line) and tag in ('p', 'div'):
+            for prop in ('margin', 'padding', 'border'):
+                for edge in ('top', 'bottom'):
+                    cssdict['%s-%s'%(prop, edge)] = '0pt'
+            if self.context.insert_blank_line:
+                cssdict['margin-top'] = cssdict['margin-bottom'] = '0.5em'
+            if self.context.remove_paragraph_spacing:
+                cssdict['text-indent'] = '1.5em'
         if cssdict:
             items = cssdict.items()
             items.sort()
@@ -253,12 +277,16 @@ class CSSFlattener(object):
         href = item.relhref(href)
         etree.SubElement(head, XHTML('link'),
             rel='stylesheet', type=CSS_MIME, href=href)
-        if stylizer.page_rule:
-            items = stylizer.page_rule.items()
-            items.sort()
-            css = '; '.join("%s: %s" % (key, val) for key, val in items)
-            style = etree.SubElement(head, XHTML('style'), type=CSS_MIME)
-            style.text = "@page { %s; }" % css
+        stylizer.page_rule['margin-top'] = '%fpt'%\
+                float(self.context.margin_top)
+        stylizer.page_rule['margin-bottom'] = '%fpt'%\
+                float(self.context.margin_bottom)
+
+        items = stylizer.page_rule.items()
+        items.sort()
+        css = '; '.join("%s: %s" % (key, val) for key, val in items)
+        style = etree.SubElement(head, XHTML('style'), type=CSS_MIME)
+        style.text = "@page { %s; }" % css
 
     def replace_css(self, css):
         manifest = self.oeb.manifest
@@ -285,3 +313,4 @@ class CSSFlattener(object):
         for item in self.oeb.spine:
             stylizer = self.stylizers[item]
             self.flatten_head(item, stylizer, href)
+
diff --git a/src/calibre/ebooks/oeb/transforms/guide.py b/src/calibre/ebooks/oeb/transforms/guide.py
new file mode 100644
index 0000000000..b20eddc6fe
--- /dev/null
+++ b/src/calibre/ebooks/oeb/transforms/guide.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+class Clean(object):
+    '''Clean up guide, leaving only a pointer to the cover'''
+
+    def __call__(self, oeb, opts):
+        from calibre.ebooks.oeb.base import urldefrag
+        self.oeb, self.log, self.opts = oeb, oeb.log, opts
+
+        cover_href = ''
+        if 'cover' not in self.oeb.guide:
+            covers = []
+            for x in ('other.ms-coverimage-standard',
+                    'other.ms-titleimage-standard', 'other.ms-titleimage',
+                    'other.ms-coverimage', 'other.ms-thumbimage-standard',
+                    'other.ms-thumbimage'):
+                if x in self.oeb.guide:
+                    href = self.oeb.guide[x].href
+                    item = self.oeb.manifest.hrefs[href]
+                    covers.append([self.oeb.guide[x], len(item.data)])
+            covers.sort(cmp=lambda x,y:cmp(x[1], y[1]), reverse=True)
+            if covers:
+                ref = covers[0][0]
+                if len(covers) > 1:
+                    self.log('Choosing %s:%s as the cover'%(ref.type, ref.href))
+                ref.type = 'cover'
+                self.oeb.guide.refs['cover'] = ref
+                cover_href = urldefrag(ref.href)[0]
+
+        for x in list(self.oeb.guide):
+            href = urldefrag(self.oeb.guide[x].href)[0]
+            if x.lower() != 'cover':
+                try:
+                    if href != cover_href:
+                        self.oeb.manifest.remove(self.oeb.manifest.hrefs[href])
+                except KeyError:
+                    pass
+                self.oeb.guide.remove(x)
+
+
diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py
new file mode 100644
index 0000000000..c182faedfa
--- /dev/null
+++ b/src/calibre/ebooks/oeb/transforms/jacket.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import textwrap
+
+from lxml import etree
+
+from calibre.ebooks.oeb.base import XPNSMAP
+from calibre import guess_type
+
+class Jacket(object):
+    '''
+    Book jacket manipulation. Remove first image and insert comments at start of
+    book.
+    '''
+
+    JACKET_TEMPLATE = textwrap.dedent(u'''\
+    <html xmlns="%(xmlns)s">
+        <head>
+            <title>%(title)s</title>
+        </head>
+        <body>
+            <h1 style="text-align: center">%(title)s</h1>
+            <h2 style="text-align: center">%(jacket)s</h2>
+            <div>
+                %(comments)s
+            </div>
+        </body>
+    </html>
+    ''')
+
+    def remove_first_image(self):
+        for i, item in enumerate(self.oeb.spine):
+            if i > 2: break
+            for img in item.data.xpath('//h:img[@src]', namespace=XPNSMAP):
+                href = item.abshref(img.get('src'))
+                image = self.oeb.manifest.hrefs.get(href, None)
+                if image is not None:
+                    self.log('Removing first image', img.get('src'))
+                    self.oeb.manifest.remove(image)
+                    img.getparent().remove(img)
+                    return
+
+    def insert_comments(self, comments):
+        self.log('Inserting metadata comments into book...')
+        comments = comments.replace('\r\n', '\n').replace('\n\n', '<br/><br/>')
+        html = self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'],
+                title=self.opts.title, comments=comments,
+                jacket=_('Book Jacket'))
+        id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml')
+        root = etree.fromstring(html)
+        item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root)
+        self.oeb.spine.insert(0, item, True)
+
+
+    def __call__(self, oeb, opts):
+        self.oeb, self.opts, self.log = oeb, opts, oeb.log
+        if opts.remove_first_image:
+            self.remove_fisrt_image()
+        if opts.insert_comments and opts.comments:
+            self.insert_comments(opts.comments)
diff --git a/src/calibre/ebooks/oeb/transforms/metadata.py b/src/calibre/ebooks/oeb/transforms/metadata.py
new file mode 100644
index 0000000000..d2c4dd6309
--- /dev/null
+++ b/src/calibre/ebooks/oeb/transforms/metadata.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import with_statement
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+class MergeMetadata(object):
+    'Merge in user metadata, including cover'
+
+    def __call__(self, oeb, mi, prefer_metadata_cover=False):
+        from calibre.ebooks.oeb.base import DC
+        self.oeb, self.log = oeb, oeb.log
+        m = self.oeb.metadata
+        self.log('Merging user specified metadata...')
+        if mi.title:
+            m.clear('title')
+            m.add('title', mi.title)
+        if mi.title_sort:
+            if not m.title:
+                m.add(DC('title'), mi.title_sort)
+            m.title[0].file_as = mi.title_sort
+        if mi.authors:
+            m.filter('creator', lambda x : x.role.lower() == 'aut')
+            for a in mi.authors:
+                attrib = {'role':'aut'}
+                if mi.author_sort:
+                    attrib['file_as'] = mi.author_sort
+                m.add('creator', a, attrib=attrib)
+        if mi.comments:
+            m.clear('description')
+            m.add('description', mi.comments)
+        if mi.publisher:
+            m.clear('publisher')
+            m.add('publisher', mi.publisher)
+        if mi.series:
+            m.clear('series')
+            m.add('series', mi.series)
+        if mi.isbn:
+            has = False
+            for x in m.identifier:
+                if x.scheme.lower() == 'isbn':
+                    x.content = mi.isbn
+                    has = True
+            if not has:
+                m.add('identifier', mi.isbn, scheme='ISBN')
+        if mi.language:
+            m.clear('language')
+            m.add('language', mi.language)
+        if mi.book_producer:
+            m.filter('creator', lambda x : x.role.lower() == 'bkp')
+            m.add('creator', mi.book_producer, role='bkp')
+        if mi.series_index is not None:
+            m.clear('series_index')
+            m.add('series_index', '%.2f'%mi.series_index)
+        if mi.rating is not None:
+            m.clear('rating')
+            m.add('rating', '%.2f'%mi.rating)
+        if mi.tags:
+            m.clear('subject')
+            for t in mi.tags:
+                m.add('subject', t)
+
+        self.set_cover(mi, prefer_metadata_cover)
+
+    def set_cover(self, mi, prefer_metadata_cover):
+        cdata = ''
+        if mi.cover and os.access(mi.cover, os.R_OK):
+            cdata = open(mi.cover, 'rb').read()
+        elif mi.cover_data and mi.cover_data[-1]:
+            cdata = mi.cover_data[1]
+        if not cdata: return
+        if 'cover' in self.oeb.guide:
+            if not prefer_metadata_cover:
+                href = self.oeb.guide['cover'].href
+                self.oeb.manifest.hrefs[href]._data = cdata
+        else:
+            id, href = self.oeb.manifest.generate('cover', 'cover.jpg')
+            self.oeb.manifest.add(id, href, 'image/jpeg', data=cdata)
+            self.oeb.guide.add('cover', 'Cover', href)
+
diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py
index bee74c54a9..b54b0ebce0 100644
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@@ -16,7 +16,7 @@ from lxml import etree
 from lxml.cssselect import CSSSelector
 
 from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \
-        urldefrag, rewrite_links
+        urldefrag, rewrite_links, urlunquote
 from calibre.ebooks.epub import tostring, rules
 
 
@@ -142,7 +142,7 @@ class Split(object):
             nhref = anchor_map[frag if frag else None]
             nhref = self.current_item.relhref(nhref)
             if frag:
-                nhref = '#'.join((nhref, frag))
+                nhref = '#'.join((urlunquote(nhref), frag))
 
             return nhref
         return url