AZW3 Input: Fix a regression in calibre 5 that broke processing of files with inline flow replacements. Fixes #1906459 [Private bug](https://bugs.launchpad.net/calibre/+bug/1906459)

This commit is contained in:
Kovid Goyal 2020-12-02 06:09:43 +05:30
parent 7c5abc873e
commit a67fb90335
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -9,7 +9,6 @@ __docformat__ = 'restructuredtext en'
import re, os import re, os
from calibre.ebooks.chardet import strip_encoding_declarations from calibre.ebooks.chardet import strip_encoding_declarations
from polyglot.builtins import unicode_type, range
def update_internal_links(mobi8_reader, log): def update_internal_links(mobi8_reader, log):
@ -125,21 +124,25 @@ def update_flow_links(mobi8_reader, resource_map, log):
font_index_pattern = re.compile(r'''kindle:embed:([0-9|A-V]+)''', re.IGNORECASE) font_index_pattern = re.compile(r'''kindle:embed:([0-9|A-V]+)''', re.IGNORECASE)
url_css_index_pattern = re.compile(r'''kindle:flow:([0-9|A-V]+)\?mime=text/css[^\)]*''', re.IGNORECASE) url_css_index_pattern = re.compile(r'''kindle:flow:([0-9|A-V]+)\?mime=text/css[^\)]*''', re.IGNORECASE)
for flow in mr.flows: def flow_as_unicode(flow):
if flow is None: # 0th flow is None if isinstance(flow, bytes):
flows.append(flow)
continue
if not isinstance(flow, unicode_type):
try: try:
flow = flow.decode(mr.header.codec) flow = flow.decode(mr.header.codec)
except UnicodeDecodeError: except UnicodeDecodeError:
log.error('Flow part has invalid %s encoded bytes'%mr.header.codec) log.error('Flow part has invalid %s encoded bytes'%mr.header.codec)
flow = flow.decode(mr.header.codec, 'replace') flow = flow.decode(mr.header.codec, 'replace')
return flow
for flow in mr.flows:
if flow is None: # 0th flow is None
flows.append(flow)
continue
flow = flow_as_unicode(flow)
# links to raster image files from image tags # links to raster image files from image tags
# image_pattern # image_pattern
srcpieces = img_pattern.split(flow) srcpieces = img_pattern.split(flow)
for j in range(1, len(srcpieces), 2): for j in range(1, len(srcpieces), 2):
tag = srcpieces[j] tag = srcpieces[j]
if tag.startswith('<im') or tag.startswith('<svg:image'): if tag.startswith('<im') or tag.startswith('<svg:image'):
@ -208,7 +211,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
tag = '' tag = ''
else: else:
if fi.format == 'inline': if fi.format == 'inline':
flowtext = mr.flows[num] flowtext = flow_as_unicode(mr.flows[num])
tag = flowtext tag = flowtext
else: else:
replacement = '"../' + fi.dir + '/' + fi.fname + '"' replacement = '"../' + fi.dir + '/' + fi.fname + '"'