MOBI Input: When links in a MOBI file point to just before block elements, and there is a page break on the block element, the links can end up pointing to the wrong place on conversion. Adjust the location in such cases to point to the block element directly.

This commit is contained in:
Kovid Goyal 2011-09-02 14:54:01 -06:00
parent 6678badce1
commit c6a957ee93

View File

@ -523,6 +523,7 @@ class MobiReader(object):
for x in root.xpath('//ncx'): for x in root.xpath('//ncx'):
x.getparent().remove(x) x.getparent().remove(x)
svg_tags = [] svg_tags = []
forwardable_anchors = []
for i, tag in enumerate(root.iter(etree.Element)): for i, tag in enumerate(root.iter(etree.Element)):
tag.attrib.pop('xmlns', '') tag.attrib.pop('xmlns', '')
for x in tag.attrib: for x in tag.attrib:
@ -651,6 +652,14 @@ class MobiReader(object):
attrib['href'] = "#filepos%d" % int(filepos) attrib['href'] = "#filepos%d" % int(filepos)
except ValueError: except ValueError:
pass pass
if (tag.tag == 'a' and attrib.get('id', '').startswith('filepos')
and not tag.text and (tag.tail is None or not
tag.tail.strip()) and tag.getnext().tag in ('h1', 'h2',
'h3', 'h4', 'h5', 'h6', 'div', 'p')):
# This is an empty anchor immediately before a block tag, move
# the id onto the block tag instead
forwardable_anchors.append(tag)
if styles: if styles:
ncls = None ncls = None
rule = '; '.join(styles) rule = '; '.join(styles)
@ -679,6 +688,17 @@ class MobiReader(object):
if hasattr(parent, 'remove'): if hasattr(parent, 'remove'):
parent.remove(tag) parent.remove(tag)
for tag in forwardable_anchors:
block = tag.getnext()
tag.getparent().remove(tag)
if 'id' in block.attrib:
tag.tail = block.text
block.text = None
block.insert(0, tag)
else:
block.attrib['id'] = tag.attrib['id']
def get_left_whitespace(self, tag): def get_left_whitespace(self, tag):
def whitespace(tag): def whitespace(tag):