AZW3 Output: Remove duplicate anchors to workaround some Kindle renderers using the last occurrence of an anchor as the target instead of the first. Fixes #1454199 [hyperlink conversion to azw3 and display on kindle](https://bugs.launchpad.net/calibre/+bug/1454199)

This commit is contained in:
Kovid Goyal 2015-05-12 17:35:12 +05:30
parent 87cbf338c1
commit 34a4c13284
2 changed files with 22 additions and 1 deletions

View File

@ -191,6 +191,8 @@ class MOBIOutput(OutputFormatPlugin):
self.check_for_periodical()
if create_kf8:
from calibre.ebooks.mobi.writer8.cleanup import remove_duplicate_anchors
remove_duplicate_anchors(self.oeb)
# Split on pagebreaks so that the resulting KF8 is faster to load
from calibre.ebooks.oeb.transforms.split import Split
Split()(self.oeb, self.opts)
@ -301,10 +303,12 @@ class AZW3Output(OutputFormatPlugin):
def convert(self, oeb, output_path, input_plugin, opts, log):
from calibre.ebooks.mobi.writer2.resources import Resources
from calibre.ebooks.mobi.writer8.main import create_kf8_book
from calibre.ebooks.mobi.writer8.cleanup import remove_duplicate_anchors
self.oeb, self.opts, self.log = oeb, opts, log
opts.mobi_periodical = self.is_periodical
passthrough = getattr(opts, 'mobi_passthrough', False)
remove_duplicate_anchors(oeb)
resources = Resources(self.oeb, self.opts, self.is_periodical,
add_fonts=True, process_images=False)

View File

@ -15,7 +15,8 @@ class CSSCleanup(object):
self.log, self.opts = log, opts
def __call__(self, item, stylizer):
if not hasattr(item.data, 'xpath'): return
if not hasattr(item.data, 'xpath'):
return
# The Kindle touch displays all black pages if the height is set on
# body
@ -23,3 +24,19 @@ class CSSCleanup(object):
style = stylizer.style(body)
style.drop('height')
def remove_duplicate_anchors(oeb):
# The Kindle apparently has incorrect behavior for duplicate anchors, see
# https://bugs.launchpad.net/calibre/+bug/1454199
for item in oeb.spine:
if not hasattr(item.data, 'xpath'):
continue
seen = set()
for tag in item.data.xpath('//*[@id or @name]'):
for attr in ('id', 'name'):
anchor = tag.get(attr)
if anchor is not None:
if anchor in seen:
oeb.log.debug('Removing duplicate anchor:', anchor)
tag.attrib.pop(attr)
else:
seen.add(anchor)