From 0b6dc7f8ed784e4a9df6bb59a13f5cb331a6c107 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 22 Apr 2009 14:35:32 -0700 Subject: [PATCH] Conversion pipeline is now a superset of any2epub :) --- src/calibre/ebooks/conversion/cli.py | 5 ++ src/calibre/ebooks/conversion/plumber.py | 74 +++++++++++++++- src/calibre/ebooks/epub/output.py | 22 +++++ src/calibre/ebooks/metadata/__init__.py | 3 + src/calibre/ebooks/oeb/base.py | 27 ++++-- src/calibre/ebooks/oeb/output.py | 3 +- src/calibre/ebooks/oeb/stylizer.py | 21 +++-- src/calibre/ebooks/oeb/transforms/flatcss.py | 43 ++++++++-- src/calibre/ebooks/oeb/transforms/guide.py | 47 +++++++++++ src/calibre/ebooks/oeb/transforms/jacket.py | 66 +++++++++++++++ src/calibre/ebooks/oeb/transforms/metadata.py | 84 +++++++++++++++++++ src/calibre/ebooks/oeb/transforms/split.py | 4 +- 12 files changed, 374 insertions(+), 25 deletions(-) create mode 100644 src/calibre/ebooks/epub/output.py create mode 100644 src/calibre/ebooks/oeb/transforms/guide.py create mode 100644 src/calibre/ebooks/oeb/transforms/jacket.py create mode 100644 src/calibre/ebooks/oeb/transforms/metadata.py diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index a30549cbc3..ae0af532ab 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -117,6 +117,9 @@ def add_pipeline_options(parser, plumber): 'line_height', 'linearize_tables', 'extra_css', + 'margin_top', 'margin_left', 'margin_right', + 'margin_bottom', 'dont_justify', + 'insert_blank_line', 'remove_paragraph_spacing', ] ), @@ -124,6 +127,8 @@ def add_pipeline_options(parser, plumber): _('Control auto-detection of document structure.'), [ 'dont_split_on_page_breaks', 'chapter', 'chapter_mark', + 'prefer_metadata_cover', 'remove_first_image', + 'insert_comments', ] ), diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 22c11303ad..f55d677d08 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -195,7 +195,7 @@ OptionRecommendation(name='toc_filter', OptionRecommendation(name='chapter', recommended_value="//*[((name()='h1' or name()='h2') and " - "re:test(., 'chapter|book|section|part', 'i')) or @class " + r"re:test(., 'chapter|book|section|part\s+', 'i')) or @class " "= 'chapter']", level=OptionRecommendation.LOW, help=_('An XPath expression to detect chapter titles. The default ' 'is to consider

or

tags that contain the words ' @@ -227,6 +227,64 @@ OptionRecommendation(name='extra_css', 'rules.') ), +OptionRecommendation(name='margin_top', + recommended_value=5.0, level=OptionRecommendation.LOW, + help=_('Set the top margin in pts. Default is %default')), + +OptionRecommendation(name='margin_bottom', + recommended_value=5.0, level=OptionRecommendation.LOW, + help=_('Set the bottom margin in pts. Default is %default')), + +OptionRecommendation(name='margin_left', + recommended_value=5.0, level=OptionRecommendation.LOW, + help=_('Set the left margin in pts. Default is %default')), + +OptionRecommendation(name='margin_right', + recommended_value=5.0, level=OptionRecommendation.LOW, + help=_('Set the right margin in pts. Default is %default')), + +OptionRecommendation(name='dont_justify', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Do not force text to be justified in output. Whether text ' + 'is actually displayed justified or not depends on whether ' + 'the ebook format and reading device support justification.') + ), + +OptionRecommendation(name='remove_paragraph_spacing', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Remove spacing between paragraphs. Also sets an indent on ' + 'paragraphs of 1.5em. Spacing removal will not work ' + 'if the source file does not use paragraphs (

or

tags).') + ), + +OptionRecommendation(name='prefer_metadata_cover', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Use the cover detected from the source file in preference ' + 'to the specified cover.') + ), + +OptionRecommendation(name='insert_blank_line', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Insert a blank line between paragraphs. Will not work ' + 'if the source file does not use paragraphs (

or

tags).' + ) + ), + +OptionRecommendation(name='remove_first_image', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Remove the first image from the input ebook. Useful if the ' + 'first image in the source file is a cover and you are specifying ' + 'an external cover.' + ) + ), + +OptionRecommendation(name='insert_comments', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Insert the comments/summary from the book metadata at the start of ' + 'the book. This is useful if your ebook reader does not support ' + 'displaying the comments from the metadata.' + ) + ), OptionRecommendation(name='read_metadata_from_opf', @@ -244,7 +302,8 @@ OptionRecommendation(name='title', OptionRecommendation(name='authors', recommended_value=None, level=OptionRecommendation.LOW, - help=_('Set the authors. Multiple authors should be separated ')), + help=_('Set the authors. Multiple authors should be separated by ' + 'ampersands.')), OptionRecommendation(name='title_sort', recommended_value=None, level=OptionRecommendation.LOW, @@ -428,7 +487,6 @@ OptionRecommendation(name='language', mi.cover = None self.user_metadata = mi - def setup_options(self): ''' Setup the `self.opts` object. @@ -479,9 +537,16 @@ OptionRecommendation(name='language', if not hasattr(self.oeb, 'manifest'): self.oeb = create_oebbook(self.log, self.oeb, self.opts) + from calibre.ebooks.oeb.transforms.guide import Clean + Clean()(self.oeb, self.opts) + self.opts.source = self.opts.input_profile self.opts.dest = self.opts.output_profile + from calibre.ebooks.oeb.transforms.metadata import MergeMetadata + MergeMetadata()(self.oeb, self.user_metadata, + self.opts.prefer_metadata_cover) + from calibre.ebooks.oeb.transforms.structure import DetectStructure DetectStructure()(self.oeb, self.opts) @@ -495,6 +560,9 @@ OptionRecommendation(name='language', else: fkey = map(float, fkey.split(',')) + from calibre.ebooks.oeb.transforms.jacket import Jacket + Jacket()(self.oeb, self.opts) + if self.opts.extra_css and os.path.exists(self.opts.extra_css): self.opts.extra_css = open(self.opts.extra_css, 'rb').read() diff --git a/src/calibre/ebooks/epub/output.py b/src/calibre/ebooks/epub/output.py new file mode 100644 index 0000000000..4ce13720e0 --- /dev/null +++ b/src/calibre/ebooks/epub/output.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + + +from calibre.customize.conversion import OutputFormatPlugin +from calibre import CurrentDir + +class EPUBOutput(OutputFormatPlugin): + + name = 'EPUB Output' + author = 'Kovid Goyal' + file_type = 'epub' + + def convert(self, oeb, output_path, input_plugin, opts, log): + self.log, self.opts = log, opts + + diff --git a/src/calibre/ebooks/metadata/__init__.py b/src/calibre/ebooks/metadata/__init__.py index a14950a064..793c607527 100644 --- a/src/calibre/ebooks/metadata/__init__.py +++ b/src/calibre/ebooks/metadata/__init__.py @@ -260,6 +260,9 @@ class MetaInformation(object): x = 1.0 return '%d'%x if int(x) == x else '%.2f'%x + def authors_from_string(self, raw): + self.authors = string_to_authors(raw) + def __unicode__(self): ans = [] def fmt(x, y): diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index a36ad8f676..81120aaf2e 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -514,7 +514,8 @@ class Metadata(object): scheme = Attribute(lambda term: 'scheme' if \ term == OPF('meta') else OPF('scheme'), [DC('identifier'), OPF('meta')]) - file_as = Attribute(OPF('file-as'), [DC('creator'), DC('contributor')]) + file_as = Attribute(OPF('file-as'), [DC('creator'), DC('contributor'), + DC('title')]) role = Attribute(OPF('role'), [DC('creator'), DC('contributor')]) event = Attribute(OPF('event'), [DC('date')]) id = Attribute('id') @@ -593,6 +594,19 @@ class Metadata(object): yield key __iter__ = iterkeys + def clear(self, key): + l = self.items[key] + for x in list(l): + l.remove(x) + + def filter(self, key, predicate): + l = self.items[key] + for x in list(l): + if predicate(x): + l.remove(x) + + + def __getitem__(self, key): return self.items[key] @@ -1011,7 +1025,7 @@ class Manifest(object): media_type = OEB_DOC_MIME elif media_type in OEB_STYLES: media_type = OEB_CSS_MIME - attrib = {'id': item.id, 'href': item.href, + attrib = {'id': item.id, 'href': urlunquote(item.href), 'media-type': media_type} if item.fallback: attrib['fallback'] = item.fallback @@ -1202,6 +1216,9 @@ class Guide(object): self.refs[type] = ref return ref + def remove(self, type): + return self.refs.pop(type, None) + def iterkeys(self): for type in self.refs: yield type @@ -1229,7 +1246,7 @@ class Guide(object): def to_opf1(self, parent=None): elem = element(parent, 'guide') for ref in self.refs.values(): - attrib = {'type': ref.type, 'href': ref.href} + attrib = {'type': ref.type, 'href': urlunquote(ref.href)} if ref.title: attrib['title'] = ref.title element(elem, 'reference', attrib=attrib) @@ -1345,7 +1362,7 @@ class TOC(object): def to_opf1(self, tour): for node in self.nodes: element(tour, 'site', attrib={ - 'title': node.title, 'href': node.href}) + 'title': node.title, 'href': urlunquote(node.href)}) node.to_opf1(tour) return tour @@ -1358,7 +1375,7 @@ class TOC(object): point = element(parent, NCX('navPoint'), attrib=attrib) label = etree.SubElement(point, NCX('navLabel')) element(label, NCX('text')).text = node.title - element(point, NCX('content'), src=node.href) + element(point, NCX('content'), src=urlunquote(node.href)) node.to_ncx(point) return parent diff --git a/src/calibre/ebooks/oeb/output.py b/src/calibre/ebooks/oeb/output.py index ba62897215..6f141f7e5e 100644 --- a/src/calibre/ebooks/oeb/output.py +++ b/src/calibre/ebooks/oeb/output.py @@ -9,6 +9,7 @@ from lxml import etree from calibre.customize.conversion import OutputFormatPlugin from calibre import CurrentDir +from urllib import unquote class OEBOutput(OutputFormatPlugin): @@ -32,7 +33,7 @@ class OEBOutput(OutputFormatPlugin): f.write(raw) for item in oeb_book.manifest: - path = os.path.abspath(item.href) + path = os.path.abspath(unquote(item.href)) dir = os.path.dirname(path) if not os.path.exists(dir): os.makedirs(dir) diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index 34abea32f5..752a135db3 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -11,6 +11,7 @@ __copyright__ = '2008, Marshall T. Vandegrift ' import os import itertools import re +import logging import copy from weakref import WeakKeyDictionary from xml.dom import SyntaxErr as CSSSyntaxError @@ -106,7 +107,8 @@ class CSSSelector(etree.XPath): class Stylizer(object): STYLESHEETS = WeakKeyDictionary() - def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'], extra_css=''): + def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'], + extra_css='', user_css=''): self.oeb = oeb self.profile = profile self.logger = oeb.logger @@ -115,7 +117,8 @@ class Stylizer(object): cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [HTML_CSS_STYLESHEET] head = xpath(tree, '/h:html/h:head')[0] - parser = cssutils.CSSParser(fetcher=self._fetch_css_file) + parser = cssutils.CSSParser(fetcher=self._fetch_css_file, + log=logging.getLogger('calibre.css')) for elem in head: if elem.tag == XHTML('style') and elem.text \ and elem.get('type', CSS_MIME) in OEB_STYLES: @@ -135,11 +138,12 @@ class Stylizer(object): (path, item.href)) continue stylesheets.append(sitem.data) - if extra_css: - text = XHTML_CSS_NAMESPACE + extra_css - stylesheet = parser.parseString(text, href=cssname) - stylesheet.namespaces['h'] = XHTML_NS - stylesheets.append(stylesheet) + for x in (extra_css, user_css): + if x: + text = XHTML_CSS_NAMESPACE + x + stylesheet = parser.parseString(text, href=cssname) + stylesheet.namespaces['h'] = XHTML_NS + stylesheets.append(stylesheet) rules = [] index = 0 self.stylesheets = set() @@ -288,6 +292,9 @@ class Style(object): self._lineHeight = None stylizer._styles[element] = self + def set(self, prop, val): + self._style[prop] = val + def _update_cssdict(self, cssdict): self._style.update(cssdict) diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index ca96d28a8d..216697ae53 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -114,12 +114,27 @@ class CSSFlattener(object): def stylize_spine(self): self.stylizers = {} profile = self.context.source + css = '' for item in self.oeb.spine: html = item.data + body = html.find(XHTML('body')) + bs = body.get('style', '').split(';') + bs.append('margin-top: 0pt') + bs.append('margin-bottom: 0pt') + bs.append('margin-left : %fpt'%\ + float(self.context.margin_left)) + bs.append('margin-right : %fpt'%\ + float(self.context.margin_right)) + bs.append('text-align: '+ \ + ('left' if self.context.dont_justify else 'justify')) + body.set('style', '; '.join(bs)) + stylizer = Stylizer(html, item.href, self.oeb, profile, - extra_css=self.context.extra_css) + user_css=self.context.extra_css, + extra_css=css) self.stylizers[item] = stylizer + def baseline_node(self, node, stylizer, sizes, csize): csize = stylizer.style(node)['font-size'] if node.text: @@ -219,6 +234,15 @@ class CSSFlattener(object): if self.lineh and 'line-height' not in cssdict: lineh = self.lineh / psize cssdict['line-height'] = "%0.5fem" % lineh + if (self.context.remove_paragraph_spacing or + self.context.insert_blank_line) and tag in ('p', 'div'): + for prop in ('margin', 'padding', 'border'): + for edge in ('top', 'bottom'): + cssdict['%s-%s'%(prop, edge)] = '0pt' + if self.context.insert_blank_line: + cssdict['margin-top'] = cssdict['margin-bottom'] = '0.5em' + if self.context.remove_paragraph_spacing: + cssdict['text-indent'] = '1.5em' if cssdict: items = cssdict.items() items.sort() @@ -253,12 +277,16 @@ class CSSFlattener(object): href = item.relhref(href) etree.SubElement(head, XHTML('link'), rel='stylesheet', type=CSS_MIME, href=href) - if stylizer.page_rule: - items = stylizer.page_rule.items() - items.sort() - css = '; '.join("%s: %s" % (key, val) for key, val in items) - style = etree.SubElement(head, XHTML('style'), type=CSS_MIME) - style.text = "@page { %s; }" % css + stylizer.page_rule['margin-top'] = '%fpt'%\ + float(self.context.margin_top) + stylizer.page_rule['margin-bottom'] = '%fpt'%\ + float(self.context.margin_bottom) + + items = stylizer.page_rule.items() + items.sort() + css = '; '.join("%s: %s" % (key, val) for key, val in items) + style = etree.SubElement(head, XHTML('style'), type=CSS_MIME) + style.text = "@page { %s; }" % css def replace_css(self, css): manifest = self.oeb.manifest @@ -285,3 +313,4 @@ class CSSFlattener(object): for item in self.oeb.spine: stylizer = self.stylizers[item] self.flatten_head(item, stylizer, href) + diff --git a/src/calibre/ebooks/oeb/transforms/guide.py b/src/calibre/ebooks/oeb/transforms/guide.py new file mode 100644 index 0000000000..b20eddc6fe --- /dev/null +++ b/src/calibre/ebooks/oeb/transforms/guide.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + + +class Clean(object): + '''Clean up guide, leaving only a pointer to the cover''' + + def __call__(self, oeb, opts): + from calibre.ebooks.oeb.base import urldefrag + self.oeb, self.log, self.opts = oeb, oeb.log, opts + + cover_href = '' + if 'cover' not in self.oeb.guide: + covers = [] + for x in ('other.ms-coverimage-standard', + 'other.ms-titleimage-standard', 'other.ms-titleimage', + 'other.ms-coverimage', 'other.ms-thumbimage-standard', + 'other.ms-thumbimage'): + if x in self.oeb.guide: + href = self.oeb.guide[x].href + item = self.oeb.manifest.hrefs[href] + covers.append([self.oeb.guide[x], len(item.data)]) + covers.sort(cmp=lambda x,y:cmp(x[1], y[1]), reverse=True) + if covers: + ref = covers[0][0] + if len(covers) > 1: + self.log('Choosing %s:%s as the cover'%(ref.type, ref.href)) + ref.type = 'cover' + self.oeb.guide.refs['cover'] = ref + cover_href = urldefrag(ref.href)[0] + + for x in list(self.oeb.guide): + href = urldefrag(self.oeb.guide[x].href)[0] + if x.lower() != 'cover': + try: + if href != cover_href: + self.oeb.manifest.remove(self.oeb.manifest.hrefs[href]) + except KeyError: + pass + self.oeb.guide.remove(x) + + diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py new file mode 100644 index 0000000000..c182faedfa --- /dev/null +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import textwrap + +from lxml import etree + +from calibre.ebooks.oeb.base import XPNSMAP +from calibre import guess_type + +class Jacket(object): + ''' + Book jacket manipulation. Remove first image and insert comments at start of + book. + ''' + + JACKET_TEMPLATE = textwrap.dedent(u'''\ + + + %(title)s + + +

%(title)s

+

%(jacket)s

+
+ %(comments)s +
+ + + ''') + + def remove_first_image(self): + for i, item in enumerate(self.oeb.spine): + if i > 2: break + for img in item.data.xpath('//h:img[@src]', namespace=XPNSMAP): + href = item.abshref(img.get('src')) + image = self.oeb.manifest.hrefs.get(href, None) + if image is not None: + self.log('Removing first image', img.get('src')) + self.oeb.manifest.remove(image) + img.getparent().remove(img) + return + + def insert_comments(self, comments): + self.log('Inserting metadata comments into book...') + comments = comments.replace('\r\n', '\n').replace('\n\n', '

') + html = self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'], + title=self.opts.title, comments=comments, + jacket=_('Book Jacket')) + id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml') + root = etree.fromstring(html) + item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root) + self.oeb.spine.insert(0, item, True) + + + def __call__(self, oeb, opts): + self.oeb, self.opts, self.log = oeb, opts, oeb.log + if opts.remove_first_image: + self.remove_fisrt_image() + if opts.insert_comments and opts.comments: + self.insert_comments(opts.comments) diff --git a/src/calibre/ebooks/oeb/transforms/metadata.py b/src/calibre/ebooks/oeb/transforms/metadata.py new file mode 100644 index 0000000000..d2c4dd6309 --- /dev/null +++ b/src/calibre/ebooks/oeb/transforms/metadata.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2009, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +import os + +class MergeMetadata(object): + 'Merge in user metadata, including cover' + + def __call__(self, oeb, mi, prefer_metadata_cover=False): + from calibre.ebooks.oeb.base import DC + self.oeb, self.log = oeb, oeb.log + m = self.oeb.metadata + self.log('Merging user specified metadata...') + if mi.title: + m.clear('title') + m.add('title', mi.title) + if mi.title_sort: + if not m.title: + m.add(DC('title'), mi.title_sort) + m.title[0].file_as = mi.title_sort + if mi.authors: + m.filter('creator', lambda x : x.role.lower() == 'aut') + for a in mi.authors: + attrib = {'role':'aut'} + if mi.author_sort: + attrib['file_as'] = mi.author_sort + m.add('creator', a, attrib=attrib) + if mi.comments: + m.clear('description') + m.add('description', mi.comments) + if mi.publisher: + m.clear('publisher') + m.add('publisher', mi.publisher) + if mi.series: + m.clear('series') + m.add('series', mi.series) + if mi.isbn: + has = False + for x in m.identifier: + if x.scheme.lower() == 'isbn': + x.content = mi.isbn + has = True + if not has: + m.add('identifier', mi.isbn, scheme='ISBN') + if mi.language: + m.clear('language') + m.add('language', mi.language) + if mi.book_producer: + m.filter('creator', lambda x : x.role.lower() == 'bkp') + m.add('creator', mi.book_producer, role='bkp') + if mi.series_index is not None: + m.clear('series_index') + m.add('series_index', '%.2f'%mi.series_index) + if mi.rating is not None: + m.clear('rating') + m.add('rating', '%.2f'%mi.rating) + if mi.tags: + m.clear('subject') + for t in mi.tags: + m.add('subject', t) + + self.set_cover(mi, prefer_metadata_cover) + + def set_cover(self, mi, prefer_metadata_cover): + cdata = '' + if mi.cover and os.access(mi.cover, os.R_OK): + cdata = open(mi.cover, 'rb').read() + elif mi.cover_data and mi.cover_data[-1]: + cdata = mi.cover_data[1] + if not cdata: return + if 'cover' in self.oeb.guide: + if not prefer_metadata_cover: + href = self.oeb.guide['cover'].href + self.oeb.manifest.hrefs[href]._data = cdata + else: + id, href = self.oeb.manifest.generate('cover', 'cover.jpg') + self.oeb.manifest.add(id, href, 'image/jpeg', data=cdata) + self.oeb.guide.add('cover', 'Cover', href) + diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index bee74c54a9..b54b0ebce0 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -16,7 +16,7 @@ from lxml import etree from lxml.cssselect import CSSSelector from calibre.ebooks.oeb.base import OEB_STYLES, XPNSMAP as NAMESPACES, \ - urldefrag, rewrite_links + urldefrag, rewrite_links, urlunquote from calibre.ebooks.epub import tostring, rules @@ -142,7 +142,7 @@ class Split(object): nhref = anchor_map[frag if frag else None] nhref = self.current_item.relhref(nhref) if frag: - nhref = '#'.join((nhref, frag)) + nhref = '#'.join((urlunquote(nhref), frag)) return nhref return url