From eba6551580c328e987c091612d2b52f5d541d94c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 27 Aug 2015 22:17:18 +0530
Subject: [PATCH] AZW3 Output: Fix large tables (with many columns) not
 rendering correctly on Kindles. Fixes #1489495 [Private
 bug](https://bugs.launchpad.net/calibre/+bug/1489495)

---
 src/calibre/ebooks/mobi/reader/markup.py    |  7 ++--
 src/calibre/ebooks/mobi/writer8/main.py     | 40 ++++++++++++++++-----
 src/calibre/ebooks/mobi/writer8/skeleton.py |  2 +-
 3 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/src/calibre/ebooks/mobi/reader/markup.py b/src/calibre/ebooks/mobi/reader/markup.py
index 69c70addb7..71b7fe549b 100644
--- a/src/calibre/ebooks/mobi/reader/markup.py
+++ b/src/calibre/ebooks/mobi/reader/markup.py
@@ -60,10 +60,11 @@ def update_internal_links(mobi8_reader, log):
 
 def remove_kindlegen_markup(parts, aid_anchor_suffix, linked_aids):
 
-    # we can safely remove all of the Kindlegen generated aid tags
-    find_tag_with_aid_pattern = re.compile(r'''(<[^>]*\said\s*=[^>]*>)''',
+    # we can safely remove all of the Kindlegen generated aid attributes and
+    # calibre generated cid attributes
+    find_tag_with_aid_pattern = re.compile(r'''(<[^>]*\s[ac]id\s*=[^>]*>)''',
             re.IGNORECASE)
-    within_tag_aid_position_pattern = re.compile(r'''\said\s*=['"]([^'"]*)['"]''')
+    within_tag_aid_position_pattern = re.compile(r'''\s[ac]id\s*=['"]([^'"]*)['"]''')
 
     for i in xrange(len(parts)):
         part = parts[i]
diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py
index 94377b9b40..720d5b0d28 100644
--- a/src/calibre/ebooks/mobi/writer8/main.py
+++ b/src/calibre/ebooks/mobi/writer8/main.py
@@ -101,6 +101,11 @@ class KF8Writer(object):
                 if not tag.text and not tag.get('src', False):
                     tag.getparent().remove(tag)
 
+            # Remove [ac]id attributes as they are used by this code for anchor
+            # to offset mapping
+            for tag in XPath('//*[@aid or @cid]')(root):
+                tag.attrib.pop('aid', None), tag.attrib.pop('cid', None)
+
     def replace_resource_links(self):
         ''' Replace links to resources (raster images/fonts) with pointers to
         the MOBI record containing the resource. The pointers are of the form:
@@ -269,10 +274,18 @@ class KF8Writer(object):
 
     def insert_aid_attributes(self):
         self.id_map = {}
+        cid = 0
         for i, item in enumerate(self.oeb.spine):
             root = self.data(item)
             aidbase = i * int(1e6)
             j = 0
+            def in_table(elem):
+                p = elem.getparent()
+                if p is None:
+                    return False
+                if barename(p.tag).lower() == 'table':
+                    return True
+                return in_table(p)
             for tag in root.iterdescendants(etree.Element):
                 id_ = tag.attrib.get('id', None)
                 if id_ is None and tag.tag == XHTML('a'):
@@ -280,15 +293,26 @@ class KF8Writer(object):
                     id_ = tag.attrib.get('name', None)
                     if id_ is not None:
                         tag.attrib['id'] = id_
-                if id_ is not None or barename(tag.tag).lower() in aid_able_tags:
-                    aid = to_base(aidbase + j, base=32)
-                    tag.set('aid', aid)
-                    if tag.tag == XHTML('body'):
-                        self.id_map[(item.href, '')] = aid
-                    if id_ is not None:
-                        self.id_map[(item.href, id_)] = aid
+                tagname = barename(tag.tag).lower()
+                if id_ is not None or tagname in aid_able_tags:
+                    if tagname == 'table' or in_table(tag):
+                        # The Kindle renderer barfs on large tables that have
+                        # aid on any of their tags. See
+                        # https://bugs.launchpad.net/bugs/1489495
+                        if id_:
+                            cid += 1
+                            val = 'c%d' % cid
+                            self.id_map[(item.href, id_)] = val
+                            tag.set('cid', val)
+                    else:
+                        aid = to_base(aidbase + j, base=32)
+                        tag.set('aid', aid)
+                        if tag.tag == XHTML('body'):
+                            self.id_map[(item.href, '')] = aid
+                        if id_ is not None:
+                            self.id_map[(item.href, id_)] = aid
 
-                    j += 1
+                        j += 1
 
     def chunk_it_up(self):
         placeholder_map = {}
diff --git a/src/calibre/ebooks/mobi/writer8/skeleton.py b/src/calibre/ebooks/mobi/writer8/skeleton.py
index a34ed36270..6e0c7fa36a 100644
--- a/src/calibre/ebooks/mobi/writer8/skeleton.py
+++ b/src/calibre/ebooks/mobi/writer8/skeleton.py
@@ -360,7 +360,7 @@ class Chunker(object):
         # an offset from the start of the chunk to the start of the tag pointed
         # to by the link.
         aid_map = {}  # Map of aid to (fid, offset_from_start_of_chunk, offset_from_start_of_text)
-        for match in re.finditer(br'<[^>]+? aid=[\'"]([A-Z0-9]+)[\'"]', rebuilt_text):
+        for match in re.finditer(br'<[^>]+? [ac]id=[\'"]([cA-Z0-9]+)[\'"]', rebuilt_text):
             offset = match.start()
             pos_fid = None
             for chunk in self.chunk_table: