KF8 Output: Handle all CSS correctly

2025-07-09 03:04:10 -04:00 · 2012-04-17 22:20:50 +05:30 · 2012-04-17 22:20:50 +05:30 · 38340559ac
commit 38340559ac
parent 2b4f74b86d
3 changed files with 69 additions and 11 deletions
--- a/src/calibre/ebooks/mobi/utils.py
+++ b/src/calibre/ebooks/mobi/utils.py
@ -364,7 +364,7 @@ def count_set_bits(num):
        num >>= 1
    return ans
-def to_base(num, base=32):
+def to_base(num, base=32, min_num_digits=None):
    digits = string.digits + string.ascii_uppercase
    sign = 1 if num >= 0 else -1
    if num == 0: return '0'
@ -373,6 +373,8 @@ def to_base(num, base=32):
    while num:
        ans.append(digits[(num % base)])
        num //= base
    if min_num_digits is not None and len(ans) < min_num_digits:
        ans.extend('0'*(min_num_digits - len(ans)))
    if sign < 0:
        ans.append('-')
    ans.reverse()
--- a/src/calibre/ebooks/mobi/writer8/main.py
+++ b/src/calibre/ebooks/mobi/writer8/main.py
@ -9,14 +9,22 @@ __docformat__ = 'restructuredtext en'
 import copy
 from functools import partial
 from collections import defaultdict
 import cssutils
 from lxml import etree
-from calibre import isbytestring
+from calibre import isbytestring, force_unicode
-from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath)
+from calibre.ebooks.mobi.utils import to_base
 from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
        extract, XHTML)
 XML_DOCS = OEB_DOCS | {SVG_MIME}
 # References to record numbers in KF8 are stored as base-32 encoded integers,
 # with 4 digits
 to_ref = partial(to_base, base=32, min_num_digits=4)
 class KF8Writer(object):
    def __init__(self, oeb, opts, resources):
@ -24,10 +32,10 @@ class KF8Writer(object):
        self.used_images = set()
        self.resources = resources
        self.dup_data()
        self.flows = [None] # First flow item is reserved for the text
        self.replace_resource_links()
-
+        self.extract_css_into_flows()
        self.create_pieces()
    def dup_data(self):
        ''' Duplicate data so that any changes we make to markup/CSS only
@ -57,12 +65,13 @@ class KF8Writer(object):
            idx = self.resources.item_map.get(ref, None)
            if idx is not None:
                is_image = self.resources.records[idx-1][:4] not in {b'FONT'}
                idx = to_ref(idx)
                if is_image:
                    self.used_images.add(ref)
-                    return 'kindle:embed:%04d?mime=%s'%(idx,
+                    return 'kindle:embed:%s?mime=%s'%(idx,
                            self.resources.mime_map[ref])
                else:
-                    return 'kindle:embed:%04d'%idx
+                    return 'kindle:embed:%s'%idx
            return oref
        for item in self.oeb.manifest:
@ -90,11 +99,44 @@ class KF8Writer(object):
                replacer = partial(pointer, item)
                cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True)
    def extract_css_into_flows(self):
        inlines = defaultdict(list) # Ensure identical <style>s not repeated
        sheets = {}
-    def create_pieces(self):
+        for item in self.oeb.manifest:
-        self.flows = [None] # First flow item is reserved for the text
+            if item.media_type in OEB_STYLES:
                data = self.data(item).cssText
                self.flows.append(force_unicode(data, 'utf-8'))
                sheets[item.href] = len(self.flows)
        for item in self.oeb.spine:
            root = self.data(item)
-            root
+            if not hasattr(root, 'xpath'): continue
            for link in XPath('//h:link[@href]')(root):
                href = item.abshref(link.get('href'))
                idx = sheets.get(href, None)
                if idx is not None:
                    idx = to_ref(idx)
                    link.set('href', 'kindle:flow:%s?mime=text/css'%idx)
            for tag in XPath('//h:style')(root):
                p = tag.getparent()
                idx = p.index(tag)
                raw = tag.text
                if not raw or not raw.strip():
                    extract(tag)
                    continue
                repl = etree.Element(XHTML('link'), type='text/css',
                        rel='stylesheet')
                p.insert(idx, repl)
                extract(tag)
                inlines[raw].append(repl)
        for raw, elems in inlines.iteritems():
            self.flows.append(raw)
            idx = to_ref(len(self.flows))
            for link in elems:
                link.set('href', 'kindle:flow:%s?mime=text/css'%idx)
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -357,7 +357,21 @@ def urlnormalize(href):
    parts = (urlquote(part) for part in parts)
    return urlunparse(parts)
-
+def extract(elem):
    """
    Removes this element from the tree, including its children and
    text.  The tail text is joined to the previous element or
    parent.
    """
    parent = elem.getparent()
    if parent is not None:
        if elem.tail:
            previous = elem.getprevious()
            if previous is None:
                parent.text = (parent.text or '') + elem.tail
            else:
                previous.tail = (previous.tail or '') + elem.tail
        parent.remove(elem)
 class DummyHandler(logging.Handler):