KF8 Output: Handle all CSS correctly

This commit is contained in:
Kovid Goyal 2012-04-17 22:20:50 +05:30
parent 2b4f74b86d
commit 38340559ac
3 changed files with 69 additions and 11 deletions

View File

@ -364,7 +364,7 @@ def count_set_bits(num):
num >>= 1
return ans
def to_base(num, base=32):
def to_base(num, base=32, min_num_digits=None):
digits = string.digits + string.ascii_uppercase
sign = 1 if num >= 0 else -1
if num == 0: return '0'
@ -373,6 +373,8 @@ def to_base(num, base=32):
while num:
ans.append(digits[(num % base)])
num //= base
if min_num_digits is not None and len(ans) < min_num_digits:
ans.extend('0'*(min_num_digits - len(ans)))
if sign < 0:
ans.append('-')
ans.reverse()

View File

@ -9,14 +9,22 @@ __docformat__ = 'restructuredtext en'
import copy
from functools import partial
from collections import defaultdict
import cssutils
from lxml import etree
from calibre import isbytestring
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath)
from calibre import isbytestring, force_unicode
from calibre.ebooks.mobi.utils import to_base
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
extract, XHTML)
XML_DOCS = OEB_DOCS | {SVG_MIME}
# References to record numbers in KF8 are stored as base-32 encoded integers,
# with 4 digits
to_ref = partial(to_base, base=32, min_num_digits=4)
class KF8Writer(object):
def __init__(self, oeb, opts, resources):
@ -24,10 +32,10 @@ class KF8Writer(object):
self.used_images = set()
self.resources = resources
self.dup_data()
self.flows = [None] # First flow item is reserved for the text
self.replace_resource_links()
self.create_pieces()
self.extract_css_into_flows()
def dup_data(self):
''' Duplicate data so that any changes we make to markup/CSS only
@ -57,12 +65,13 @@ class KF8Writer(object):
idx = self.resources.item_map.get(ref, None)
if idx is not None:
is_image = self.resources.records[idx-1][:4] not in {b'FONT'}
idx = to_ref(idx)
if is_image:
self.used_images.add(ref)
return 'kindle:embed:%04d?mime=%s'%(idx,
return 'kindle:embed:%s?mime=%s'%(idx,
self.resources.mime_map[ref])
else:
return 'kindle:embed:%04d'%idx
return 'kindle:embed:%s'%idx
return oref
for item in self.oeb.manifest:
@ -90,11 +99,44 @@ class KF8Writer(object):
replacer = partial(pointer, item)
cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True)
def extract_css_into_flows(self):
inlines = defaultdict(list) # Ensure identical <style>s not repeated
sheets = {}
def create_pieces(self):
self.flows = [None] # First flow item is reserved for the text
for item in self.oeb.manifest:
if item.media_type in OEB_STYLES:
data = self.data(item).cssText
self.flows.append(force_unicode(data, 'utf-8'))
sheets[item.href] = len(self.flows)
for item in self.oeb.spine:
root = self.data(item)
root
if not hasattr(root, 'xpath'): continue
for link in XPath('//h:link[@href]')(root):
href = item.abshref(link.get('href'))
idx = sheets.get(href, None)
if idx is not None:
idx = to_ref(idx)
link.set('href', 'kindle:flow:%s?mime=text/css'%idx)
for tag in XPath('//h:style')(root):
p = tag.getparent()
idx = p.index(tag)
raw = tag.text
if not raw or not raw.strip():
extract(tag)
continue
repl = etree.Element(XHTML('link'), type='text/css',
rel='stylesheet')
p.insert(idx, repl)
extract(tag)
inlines[raw].append(repl)
for raw, elems in inlines.iteritems():
self.flows.append(raw)
idx = to_ref(len(self.flows))
for link in elems:
link.set('href', 'kindle:flow:%s?mime=text/css'%idx)

View File

@ -357,7 +357,21 @@ def urlnormalize(href):
parts = (urlquote(part) for part in parts)
return urlunparse(parts)
def extract(elem):
"""
Removes this element from the tree, including its children and
text. The tail text is joined to the previous element or
parent.
"""
parent = elem.getparent()
if parent is not None:
if elem.tail:
previous = elem.getprevious()
if previous is None:
parent.text = (parent.text or '') + elem.tail
else:
previous.tail = (previous.tail or '') + elem.tail
parent.remove(elem)
class DummyHandler(logging.Handler):