From c6a957ee93c5e68035944fbccaba8787f6ea4f8e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 2 Sep 2011 14:54:01 -0600 Subject: [PATCH] MOBI Input: When links in a MOBI file point to just before block elements, and there is a page break on the block element, the links can end up pointing to the wrong place on conversion. Adjust the location in such cases to point to the block element directly. --- src/calibre/ebooks/mobi/reader.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/calibre/ebooks/mobi/reader.py b/src/calibre/ebooks/mobi/reader.py index dff09bc862..6f1c8d8cbc 100644 --- a/src/calibre/ebooks/mobi/reader.py +++ b/src/calibre/ebooks/mobi/reader.py @@ -523,6 +523,7 @@ class MobiReader(object): for x in root.xpath('//ncx'): x.getparent().remove(x) svg_tags = [] + forwardable_anchors = [] for i, tag in enumerate(root.iter(etree.Element)): tag.attrib.pop('xmlns', '') for x in tag.attrib: @@ -651,6 +652,14 @@ class MobiReader(object): attrib['href'] = "#filepos%d" % int(filepos) except ValueError: pass + if (tag.tag == 'a' and attrib.get('id', '').startswith('filepos') + and not tag.text and (tag.tail is None or not + tag.tail.strip()) and tag.getnext().tag in ('h1', 'h2', + 'h3', 'h4', 'h5', 'h6', 'div', 'p')): + # This is an empty anchor immediately before a block tag, move + # the id onto the block tag instead + forwardable_anchors.append(tag) + if styles: ncls = None rule = '; '.join(styles) @@ -679,6 +688,17 @@ class MobiReader(object): if hasattr(parent, 'remove'): parent.remove(tag) + for tag in forwardable_anchors: + block = tag.getnext() + tag.getparent().remove(tag) + + if 'id' in block.attrib: + tag.tail = block.text + block.text = None + block.insert(0, tag) + else: + block.attrib['id'] = tag.attrib['id'] + def get_left_whitespace(self, tag): def whitespace(tag):