diff --git a/src/calibre/ebooks/conversion/plugins/mobi_output.py b/src/calibre/ebooks/conversion/plugins/mobi_output.py index c005ad4dd2..c32e64f388 100644 --- a/src/calibre/ebooks/conversion/plugins/mobi_output.py +++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py @@ -191,6 +191,8 @@ class MOBIOutput(OutputFormatPlugin): self.check_for_periodical() if create_kf8: + from calibre.ebooks.mobi.writer8.cleanup import remove_duplicate_anchors + remove_duplicate_anchors(self.oeb) # Split on pagebreaks so that the resulting KF8 is faster to load from calibre.ebooks.oeb.transforms.split import Split Split()(self.oeb, self.opts) @@ -301,10 +303,12 @@ class AZW3Output(OutputFormatPlugin): def convert(self, oeb, output_path, input_plugin, opts, log): from calibre.ebooks.mobi.writer2.resources import Resources from calibre.ebooks.mobi.writer8.main import create_kf8_book + from calibre.ebooks.mobi.writer8.cleanup import remove_duplicate_anchors self.oeb, self.opts, self.log = oeb, opts, log opts.mobi_periodical = self.is_periodical passthrough = getattr(opts, 'mobi_passthrough', False) + remove_duplicate_anchors(oeb) resources = Resources(self.oeb, self.opts, self.is_periodical, add_fonts=True, process_images=False) diff --git a/src/calibre/ebooks/mobi/writer8/cleanup.py b/src/calibre/ebooks/mobi/writer8/cleanup.py index 56792be187..e40faa20ba 100644 --- a/src/calibre/ebooks/mobi/writer8/cleanup.py +++ b/src/calibre/ebooks/mobi/writer8/cleanup.py @@ -15,7 +15,8 @@ class CSSCleanup(object): self.log, self.opts = log, opts def __call__(self, item, stylizer): - if not hasattr(item.data, 'xpath'): return + if not hasattr(item.data, 'xpath'): + return # The Kindle touch displays all black pages if the height is set on # body @@ -23,3 +24,19 @@ class CSSCleanup(object): style = stylizer.style(body) style.drop('height') +def remove_duplicate_anchors(oeb): + # The Kindle apparently has incorrect behavior for duplicate anchors, see + # https://bugs.launchpad.net/calibre/+bug/1454199 + for item in oeb.spine: + if not hasattr(item.data, 'xpath'): + continue + seen = set() + for tag in item.data.xpath('//*[@id or @name]'): + for attr in ('id', 'name'): + anchor = tag.get(attr) + if anchor is not None: + if anchor in seen: + oeb.log.debug('Removing duplicate anchor:', anchor) + tag.attrib.pop(attr) + else: + seen.add(anchor)