diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index 0d9d9ce80f..0ae992f438 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -364,7 +364,7 @@ def count_set_bits(num): num >>= 1 return ans -def to_base(num, base=32): +def to_base(num, base=32, min_num_digits=None): digits = string.digits + string.ascii_uppercase sign = 1 if num >= 0 else -1 if num == 0: return '0' @@ -373,6 +373,8 @@ def to_base(num, base=32): while num: ans.append(digits[(num % base)]) num //= base + if min_num_digits is not None and len(ans) < min_num_digits: + ans.extend('0'*(min_num_digits - len(ans))) if sign < 0: ans.append('-') ans.reverse() diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index cbf2b8376c..e0447da16e 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -9,14 +9,22 @@ __docformat__ = 'restructuredtext en' import copy from functools import partial +from collections import defaultdict import cssutils +from lxml import etree -from calibre import isbytestring -from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath) +from calibre import isbytestring, force_unicode +from calibre.ebooks.mobi.utils import to_base +from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath, + extract, XHTML) XML_DOCS = OEB_DOCS | {SVG_MIME} +# References to record numbers in KF8 are stored as base-32 encoded integers, +# with 4 digits +to_ref = partial(to_base, base=32, min_num_digits=4) + class KF8Writer(object): def __init__(self, oeb, opts, resources): @@ -24,10 +32,10 @@ class KF8Writer(object): self.used_images = set() self.resources = resources self.dup_data() + self.flows = [None] # First flow item is reserved for the text self.replace_resource_links() - - self.create_pieces() + self.extract_css_into_flows() def dup_data(self): ''' Duplicate data so that any changes we make to markup/CSS only @@ -57,12 +65,13 @@ class KF8Writer(object): idx = self.resources.item_map.get(ref, None) if idx is not None: is_image = self.resources.records[idx-1][:4] not in {b'FONT'} + idx = to_ref(idx) if is_image: self.used_images.add(ref) - return 'kindle:embed:%04d?mime=%s'%(idx, + return 'kindle:embed:%s?mime=%s'%(idx, self.resources.mime_map[ref]) else: - return 'kindle:embed:%04d'%idx + return 'kindle:embed:%s'%idx return oref for item in self.oeb.manifest: @@ -90,11 +99,44 @@ class KF8Writer(object): replacer = partial(pointer, item) cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True) + def extract_css_into_flows(self): + inlines = defaultdict(list) # Ensure identical