From eba6551580c328e987c091612d2b52f5d541d94c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 27 Aug 2015 22:17:18 +0530 Subject: [PATCH] AZW3 Output: Fix large tables (with many columns) not rendering correctly on Kindles. Fixes #1489495 [Private bug](https://bugs.launchpad.net/calibre/+bug/1489495) --- src/calibre/ebooks/mobi/reader/markup.py | 7 ++-- src/calibre/ebooks/mobi/writer8/main.py | 40 ++++++++++++++++----- src/calibre/ebooks/mobi/writer8/skeleton.py | 2 +- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/mobi/reader/markup.py b/src/calibre/ebooks/mobi/reader/markup.py index 69c70addb7..71b7fe549b 100644 --- a/src/calibre/ebooks/mobi/reader/markup.py +++ b/src/calibre/ebooks/mobi/reader/markup.py @@ -60,10 +60,11 @@ def update_internal_links(mobi8_reader, log): def remove_kindlegen_markup(parts, aid_anchor_suffix, linked_aids): - # we can safely remove all of the Kindlegen generated aid tags - find_tag_with_aid_pattern = re.compile(r'''(<[^>]*\said\s*=[^>]*>)''', + # we can safely remove all of the Kindlegen generated aid attributes and + # calibre generated cid attributes + find_tag_with_aid_pattern = re.compile(r'''(<[^>]*\s[ac]id\s*=[^>]*>)''', re.IGNORECASE) - within_tag_aid_position_pattern = re.compile(r'''\said\s*=['"]([^'"]*)['"]''') + within_tag_aid_position_pattern = re.compile(r'''\s[ac]id\s*=['"]([^'"]*)['"]''') for i in xrange(len(parts)): part = parts[i] diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index 94377b9b40..720d5b0d28 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -101,6 +101,11 @@ class KF8Writer(object): if not tag.text and not tag.get('src', False): tag.getparent().remove(tag) + # Remove [ac]id attributes as they are used by this code for anchor + # to offset mapping + for tag in XPath('//*[@aid or @cid]')(root): + tag.attrib.pop('aid', None), tag.attrib.pop('cid', None) + def replace_resource_links(self): ''' Replace links to resources (raster images/fonts) with pointers to the MOBI record containing the resource. The pointers are of the form: @@ -269,10 +274,18 @@ class KF8Writer(object): def insert_aid_attributes(self): self.id_map = {} + cid = 0 for i, item in enumerate(self.oeb.spine): root = self.data(item) aidbase = i * int(1e6) j = 0 + def in_table(elem): + p = elem.getparent() + if p is None: + return False + if barename(p.tag).lower() == 'table': + return True + return in_table(p) for tag in root.iterdescendants(etree.Element): id_ = tag.attrib.get('id', None) if id_ is None and tag.tag == XHTML('a'): @@ -280,15 +293,26 @@ class KF8Writer(object): id_ = tag.attrib.get('name', None) if id_ is not None: tag.attrib['id'] = id_ - if id_ is not None or barename(tag.tag).lower() in aid_able_tags: - aid = to_base(aidbase + j, base=32) - tag.set('aid', aid) - if tag.tag == XHTML('body'): - self.id_map[(item.href, '')] = aid - if id_ is not None: - self.id_map[(item.href, id_)] = aid + tagname = barename(tag.tag).lower() + if id_ is not None or tagname in aid_able_tags: + if tagname == 'table' or in_table(tag): + # The Kindle renderer barfs on large tables that have + # aid on any of their tags. See + # https://bugs.launchpad.net/bugs/1489495 + if id_: + cid += 1 + val = 'c%d' % cid + self.id_map[(item.href, id_)] = val + tag.set('cid', val) + else: + aid = to_base(aidbase + j, base=32) + tag.set('aid', aid) + if tag.tag == XHTML('body'): + self.id_map[(item.href, '')] = aid + if id_ is not None: + self.id_map[(item.href, id_)] = aid - j += 1 + j += 1 def chunk_it_up(self): placeholder_map = {} diff --git a/src/calibre/ebooks/mobi/writer8/skeleton.py b/src/calibre/ebooks/mobi/writer8/skeleton.py index a34ed36270..6e0c7fa36a 100644 --- a/src/calibre/ebooks/mobi/writer8/skeleton.py +++ b/src/calibre/ebooks/mobi/writer8/skeleton.py @@ -360,7 +360,7 @@ class Chunker(object): # an offset from the start of the chunk to the start of the tag pointed # to by the link. aid_map = {} # Map of aid to (fid, offset_from_start_of_chunk, offset_from_start_of_text) - for match in re.finditer(br'<[^>]+? aid=[\'"]([A-Z0-9]+)[\'"]', rebuilt_text): + for match in re.finditer(br'<[^>]+? [ac]id=[\'"]([cA-Z0-9]+)[\'"]', rebuilt_text): offset = match.start() pos_fid = None for chunk in self.chunk_table: