KF8 Output: Handle all CSS correctly

This commit is contained in:
Kovid Goyal 2012-04-17 22:20:50 +05:30
parent 2b4f74b86d
commit 38340559ac
3 changed files with 69 additions and 11 deletions

View File

@ -364,7 +364,7 @@ def count_set_bits(num):
num >>= 1 num >>= 1
return ans return ans
def to_base(num, base=32): def to_base(num, base=32, min_num_digits=None):
digits = string.digits + string.ascii_uppercase digits = string.digits + string.ascii_uppercase
sign = 1 if num >= 0 else -1 sign = 1 if num >= 0 else -1
if num == 0: return '0' if num == 0: return '0'
@ -373,6 +373,8 @@ def to_base(num, base=32):
while num: while num:
ans.append(digits[(num % base)]) ans.append(digits[(num % base)])
num //= base num //= base
if min_num_digits is not None and len(ans) < min_num_digits:
ans.extend('0'*(min_num_digits - len(ans)))
if sign < 0: if sign < 0:
ans.append('-') ans.append('-')
ans.reverse() ans.reverse()

View File

@ -9,14 +9,22 @@ __docformat__ = 'restructuredtext en'
import copy import copy
from functools import partial from functools import partial
from collections import defaultdict
import cssutils import cssutils
from lxml import etree
from calibre import isbytestring from calibre import isbytestring, force_unicode
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath) from calibre.ebooks.mobi.utils import to_base
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
extract, XHTML)
XML_DOCS = OEB_DOCS | {SVG_MIME} XML_DOCS = OEB_DOCS | {SVG_MIME}
# References to record numbers in KF8 are stored as base-32 encoded integers,
# with 4 digits
to_ref = partial(to_base, base=32, min_num_digits=4)
class KF8Writer(object): class KF8Writer(object):
def __init__(self, oeb, opts, resources): def __init__(self, oeb, opts, resources):
@ -24,10 +32,10 @@ class KF8Writer(object):
self.used_images = set() self.used_images = set()
self.resources = resources self.resources = resources
self.dup_data() self.dup_data()
self.flows = [None] # First flow item is reserved for the text
self.replace_resource_links() self.replace_resource_links()
self.extract_css_into_flows()
self.create_pieces()
def dup_data(self): def dup_data(self):
''' Duplicate data so that any changes we make to markup/CSS only ''' Duplicate data so that any changes we make to markup/CSS only
@ -57,12 +65,13 @@ class KF8Writer(object):
idx = self.resources.item_map.get(ref, None) idx = self.resources.item_map.get(ref, None)
if idx is not None: if idx is not None:
is_image = self.resources.records[idx-1][:4] not in {b'FONT'} is_image = self.resources.records[idx-1][:4] not in {b'FONT'}
idx = to_ref(idx)
if is_image: if is_image:
self.used_images.add(ref) self.used_images.add(ref)
return 'kindle:embed:%04d?mime=%s'%(idx, return 'kindle:embed:%s?mime=%s'%(idx,
self.resources.mime_map[ref]) self.resources.mime_map[ref])
else: else:
return 'kindle:embed:%04d'%idx return 'kindle:embed:%s'%idx
return oref return oref
for item in self.oeb.manifest: for item in self.oeb.manifest:
@ -90,11 +99,44 @@ class KF8Writer(object):
replacer = partial(pointer, item) replacer = partial(pointer, item)
cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True) cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True)
def extract_css_into_flows(self):
inlines = defaultdict(list) # Ensure identical <style>s not repeated
sheets = {}
def create_pieces(self): for item in self.oeb.manifest:
self.flows = [None] # First flow item is reserved for the text if item.media_type in OEB_STYLES:
data = self.data(item).cssText
self.flows.append(force_unicode(data, 'utf-8'))
sheets[item.href] = len(self.flows)
for item in self.oeb.spine: for item in self.oeb.spine:
root = self.data(item) root = self.data(item)
root if not hasattr(root, 'xpath'): continue
for link in XPath('//h:link[@href]')(root):
href = item.abshref(link.get('href'))
idx = sheets.get(href, None)
if idx is not None:
idx = to_ref(idx)
link.set('href', 'kindle:flow:%s?mime=text/css'%idx)
for tag in XPath('//h:style')(root):
p = tag.getparent()
idx = p.index(tag)
raw = tag.text
if not raw or not raw.strip():
extract(tag)
continue
repl = etree.Element(XHTML('link'), type='text/css',
rel='stylesheet')
p.insert(idx, repl)
extract(tag)
inlines[raw].append(repl)
for raw, elems in inlines.iteritems():
self.flows.append(raw)
idx = to_ref(len(self.flows))
for link in elems:
link.set('href', 'kindle:flow:%s?mime=text/css'%idx)

View File

@ -357,7 +357,21 @@ def urlnormalize(href):
parts = (urlquote(part) for part in parts) parts = (urlquote(part) for part in parts)
return urlunparse(parts) return urlunparse(parts)
def extract(elem):
"""
Removes this element from the tree, including its children and
text. The tail text is joined to the previous element or
parent.
"""
parent = elem.getparent()
if parent is not None:
if elem.tail:
previous = elem.getprevious()
if previous is None:
parent.text = (parent.text or '') + elem.tail
else:
previous.tail = (previous.tail or '') + elem.tail
parent.remove(elem)
class DummyHandler(logging.Handler): class DummyHandler(logging.Handler):