AZW3 Output: Fix large tables (with many columns) not rendering correctly on Kindles. Fixes #1489495 [Private bug](https://bugs.launchpad.net/calibre/+bug/1489495)

This commit is contained in:
Kovid Goyal 2015-08-27 22:17:18 +05:30
parent 77c7a231d6
commit eba6551580
3 changed files with 37 additions and 12 deletions

View File

@ -60,10 +60,11 @@ def update_internal_links(mobi8_reader, log):
def remove_kindlegen_markup(parts, aid_anchor_suffix, linked_aids): def remove_kindlegen_markup(parts, aid_anchor_suffix, linked_aids):
# we can safely remove all of the Kindlegen generated aid tags # we can safely remove all of the Kindlegen generated aid attributes and
find_tag_with_aid_pattern = re.compile(r'''(<[^>]*\said\s*=[^>]*>)''', # calibre generated cid attributes
find_tag_with_aid_pattern = re.compile(r'''(<[^>]*\s[ac]id\s*=[^>]*>)''',
re.IGNORECASE) re.IGNORECASE)
within_tag_aid_position_pattern = re.compile(r'''\said\s*=['"]([^'"]*)['"]''') within_tag_aid_position_pattern = re.compile(r'''\s[ac]id\s*=['"]([^'"]*)['"]''')
for i in xrange(len(parts)): for i in xrange(len(parts)):
part = parts[i] part = parts[i]

View File

@ -101,6 +101,11 @@ class KF8Writer(object):
if not tag.text and not tag.get('src', False): if not tag.text and not tag.get('src', False):
tag.getparent().remove(tag) tag.getparent().remove(tag)
# Remove [ac]id attributes as they are used by this code for anchor
# to offset mapping
for tag in XPath('//*[@aid or @cid]')(root):
tag.attrib.pop('aid', None), tag.attrib.pop('cid', None)
def replace_resource_links(self): def replace_resource_links(self):
''' Replace links to resources (raster images/fonts) with pointers to ''' Replace links to resources (raster images/fonts) with pointers to
the MOBI record containing the resource. The pointers are of the form: the MOBI record containing the resource. The pointers are of the form:
@ -269,10 +274,18 @@ class KF8Writer(object):
def insert_aid_attributes(self): def insert_aid_attributes(self):
self.id_map = {} self.id_map = {}
cid = 0
for i, item in enumerate(self.oeb.spine): for i, item in enumerate(self.oeb.spine):
root = self.data(item) root = self.data(item)
aidbase = i * int(1e6) aidbase = i * int(1e6)
j = 0 j = 0
def in_table(elem):
p = elem.getparent()
if p is None:
return False
if barename(p.tag).lower() == 'table':
return True
return in_table(p)
for tag in root.iterdescendants(etree.Element): for tag in root.iterdescendants(etree.Element):
id_ = tag.attrib.get('id', None) id_ = tag.attrib.get('id', None)
if id_ is None and tag.tag == XHTML('a'): if id_ is None and tag.tag == XHTML('a'):
@ -280,15 +293,26 @@ class KF8Writer(object):
id_ = tag.attrib.get('name', None) id_ = tag.attrib.get('name', None)
if id_ is not None: if id_ is not None:
tag.attrib['id'] = id_ tag.attrib['id'] = id_
if id_ is not None or barename(tag.tag).lower() in aid_able_tags: tagname = barename(tag.tag).lower()
aid = to_base(aidbase + j, base=32) if id_ is not None or tagname in aid_able_tags:
tag.set('aid', aid) if tagname == 'table' or in_table(tag):
if tag.tag == XHTML('body'): # The Kindle renderer barfs on large tables that have
self.id_map[(item.href, '')] = aid # aid on any of their tags. See
if id_ is not None: # https://bugs.launchpad.net/bugs/1489495
self.id_map[(item.href, id_)] = aid if id_:
cid += 1
val = 'c%d' % cid
self.id_map[(item.href, id_)] = val
tag.set('cid', val)
else:
aid = to_base(aidbase + j, base=32)
tag.set('aid', aid)
if tag.tag == XHTML('body'):
self.id_map[(item.href, '')] = aid
if id_ is not None:
self.id_map[(item.href, id_)] = aid
j += 1 j += 1
def chunk_it_up(self): def chunk_it_up(self):
placeholder_map = {} placeholder_map = {}

View File

@ -360,7 +360,7 @@ class Chunker(object):
# an offset from the start of the chunk to the start of the tag pointed # an offset from the start of the chunk to the start of the tag pointed
# to by the link. # to by the link.
aid_map = {} # Map of aid to (fid, offset_from_start_of_chunk, offset_from_start_of_text) aid_map = {} # Map of aid to (fid, offset_from_start_of_chunk, offset_from_start_of_text)
for match in re.finditer(br'<[^>]+? aid=[\'"]([A-Z0-9]+)[\'"]', rebuilt_text): for match in re.finditer(br'<[^>]+? [ac]id=[\'"]([cA-Z0-9]+)[\'"]', rebuilt_text):
offset = match.start() offset = match.start()
pos_fid = None pos_fid = None
for chunk in self.chunk_table: for chunk in self.chunk_table: