EPUB Output: Mangle filenames to ensure they are unique, to support broken EPUB Readers like Aldiko and Stanza. Currently disabled pending further testing.

This commit is contained in:
Kovid Goyal 2010-12-04 20:20:20 -07:00
parent 63727bc608
commit cafe81f2e4
2 changed files with 133 additions and 0 deletions

View File

@ -142,6 +142,9 @@ class EPUBOutput(OutputFormatPlugin):
def convert(self, oeb, output_path, input_plugin, opts, log):
self.log, self.opts, self.oeb = log, opts, oeb
#from calibre.ebooks.oeb.transforms.filenames import UniqueFilenames
#UniqueFilenames()(oeb, opts)
self.workaround_ade_quirks()
self.workaround_webkit_quirks()
self.upshift_markup()

View File

@ -0,0 +1,130 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import posixpath
from urlparse import urldefrag
from lxml import etree
import cssutils
from calibre.ebooks.oeb.base import rewrite_links, urlnormalize
class RenameFiles(object):
'''
Rename files and adjust all links pointing to them. Note that the spine
and manifest are not touched by this transform.
'''
def __init__(self, rename_map):
self.rename_map = rename_map
def __call__(self, oeb, opts):
self.log = oeb.logger
self.opts = opts
self.oeb = oeb
for item in oeb.manifest.items:
self.current_item = item
if etree.iselement(item.data):
rewrite_links(self.current_item.data, self.url_replacer)
elif hasattr(item.data, 'cssText'):
cssutils.replaceUrls(item.data, self.url_replacer)
if self.oeb.guide:
for ref in self.oeb.guide.values():
href = urlnormalize(ref.href)
href, frag = urldefrag(href)
replacement = self.rename_map.get(href, None)
if replacement is not None:
nhref = replacement
if frag:
nhref += '#' + frag
ref.href = nhref
if self.oeb.toc:
self.fix_toc_entry(self.oeb.toc)
def fix_toc_entry(self, toc):
if toc.href:
href = urlnormalize(toc.href)
href, frag = urldefrag(href)
replacement = self.rename_map.get(href, None)
if replacement is not None:
nhref = replacement
if frag:
nhref = '#'.join((nhref, frag))
toc.href = nhref
for x in toc:
self.fix_toc_entry(x)
def url_replacer(self, orig_url):
url = urlnormalize(orig_url)
path, frag = urldefrag(url)
href = self.current_item.abshref(path)
replacement = self.rename_map.get(href, None)
if replacement is None:
return orig_url
replacement = self.current_item.relhref(replacement)
if frag:
replacement += '#' + frag
return replacement
class UniqueFilenames(object):
'Ensure that every item in the manifest has a unique filename'
def __call__(self, oeb, opts):
self.log = oeb.logger
self.opts = opts
self.oeb = oeb
self.seen_filenames = set([])
self.rename_map = {}
for item in list(oeb.manifest.items):
fname = posixpath.basename(item.href)
if fname in self.seen_filenames:
suffix = self.unique_suffix(fname)
data = item.data
base, ext = posixpath.splitext(item.href)
nhref = base + suffix + ext
nhref = oeb.manifest.generate(href=nhref)[1]
nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data,
fallback=item.fallback)
self.seen_filenames.add(posixpath.basename(nhref))
self.rename_map[item.href] = nhref
if item.spine_position is not None:
oeb.spine.insert(item.spine_position, nitem, item.linear)
oeb.spine.remove(item)
oeb.manifest.remove(item)
else:
self.seen_filenames.add(fname)
if self.rename_map:
self.log('Found non-unique filenames, renaming to support broken'
' EPUB readers like FBReader, Aldiko and Stanza...')
from pprint import pformat
self.log.debug(pformat(self.rename_map))
renamer = RenameFiles(self.rename_map)
renamer(oeb, opts)
def unique_suffix(self, fname):
base, ext = posixpath.splitext(fname)
c = 0
while True:
c += 1
suffix = '_u%d'%c
candidate = base + suffix + ext
if candidate not in self.seen_filenames:
return suffix