mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
KF8 Output: Handle all CSS correctly
This commit is contained in:
parent
2b4f74b86d
commit
38340559ac
@ -364,7 +364,7 @@ def count_set_bits(num):
|
|||||||
num >>= 1
|
num >>= 1
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def to_base(num, base=32):
|
def to_base(num, base=32, min_num_digits=None):
|
||||||
digits = string.digits + string.ascii_uppercase
|
digits = string.digits + string.ascii_uppercase
|
||||||
sign = 1 if num >= 0 else -1
|
sign = 1 if num >= 0 else -1
|
||||||
if num == 0: return '0'
|
if num == 0: return '0'
|
||||||
@ -373,6 +373,8 @@ def to_base(num, base=32):
|
|||||||
while num:
|
while num:
|
||||||
ans.append(digits[(num % base)])
|
ans.append(digits[(num % base)])
|
||||||
num //= base
|
num //= base
|
||||||
|
if min_num_digits is not None and len(ans) < min_num_digits:
|
||||||
|
ans.extend('0'*(min_num_digits - len(ans)))
|
||||||
if sign < 0:
|
if sign < 0:
|
||||||
ans.append('-')
|
ans.append('-')
|
||||||
ans.reverse()
|
ans.reverse()
|
||||||
|
@ -9,14 +9,22 @@ __docformat__ = 'restructuredtext en'
|
|||||||
|
|
||||||
import copy
|
import copy
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
import cssutils
|
import cssutils
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from calibre import isbytestring
|
from calibre import isbytestring, force_unicode
|
||||||
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath)
|
from calibre.ebooks.mobi.utils import to_base
|
||||||
|
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME, XPath,
|
||||||
|
extract, XHTML)
|
||||||
|
|
||||||
XML_DOCS = OEB_DOCS | {SVG_MIME}
|
XML_DOCS = OEB_DOCS | {SVG_MIME}
|
||||||
|
|
||||||
|
# References to record numbers in KF8 are stored as base-32 encoded integers,
|
||||||
|
# with 4 digits
|
||||||
|
to_ref = partial(to_base, base=32, min_num_digits=4)
|
||||||
|
|
||||||
class KF8Writer(object):
|
class KF8Writer(object):
|
||||||
|
|
||||||
def __init__(self, oeb, opts, resources):
|
def __init__(self, oeb, opts, resources):
|
||||||
@ -24,10 +32,10 @@ class KF8Writer(object):
|
|||||||
self.used_images = set()
|
self.used_images = set()
|
||||||
self.resources = resources
|
self.resources = resources
|
||||||
self.dup_data()
|
self.dup_data()
|
||||||
|
self.flows = [None] # First flow item is reserved for the text
|
||||||
|
|
||||||
self.replace_resource_links()
|
self.replace_resource_links()
|
||||||
|
self.extract_css_into_flows()
|
||||||
self.create_pieces()
|
|
||||||
|
|
||||||
def dup_data(self):
|
def dup_data(self):
|
||||||
''' Duplicate data so that any changes we make to markup/CSS only
|
''' Duplicate data so that any changes we make to markup/CSS only
|
||||||
@ -57,12 +65,13 @@ class KF8Writer(object):
|
|||||||
idx = self.resources.item_map.get(ref, None)
|
idx = self.resources.item_map.get(ref, None)
|
||||||
if idx is not None:
|
if idx is not None:
|
||||||
is_image = self.resources.records[idx-1][:4] not in {b'FONT'}
|
is_image = self.resources.records[idx-1][:4] not in {b'FONT'}
|
||||||
|
idx = to_ref(idx)
|
||||||
if is_image:
|
if is_image:
|
||||||
self.used_images.add(ref)
|
self.used_images.add(ref)
|
||||||
return 'kindle:embed:%04d?mime=%s'%(idx,
|
return 'kindle:embed:%s?mime=%s'%(idx,
|
||||||
self.resources.mime_map[ref])
|
self.resources.mime_map[ref])
|
||||||
else:
|
else:
|
||||||
return 'kindle:embed:%04d'%idx
|
return 'kindle:embed:%s'%idx
|
||||||
return oref
|
return oref
|
||||||
|
|
||||||
for item in self.oeb.manifest:
|
for item in self.oeb.manifest:
|
||||||
@ -90,11 +99,44 @@ class KF8Writer(object):
|
|||||||
replacer = partial(pointer, item)
|
replacer = partial(pointer, item)
|
||||||
cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True)
|
cssutils.replaceUrls(sheet, replacer, ignoreImportRules=True)
|
||||||
|
|
||||||
|
def extract_css_into_flows(self):
|
||||||
|
inlines = defaultdict(list) # Ensure identical <style>s not repeated
|
||||||
|
sheets = {}
|
||||||
|
|
||||||
def create_pieces(self):
|
for item in self.oeb.manifest:
|
||||||
self.flows = [None] # First flow item is reserved for the text
|
if item.media_type in OEB_STYLES:
|
||||||
|
data = self.data(item).cssText
|
||||||
|
self.flows.append(force_unicode(data, 'utf-8'))
|
||||||
|
sheets[item.href] = len(self.flows)
|
||||||
|
|
||||||
for item in self.oeb.spine:
|
for item in self.oeb.spine:
|
||||||
root = self.data(item)
|
root = self.data(item)
|
||||||
root
|
if not hasattr(root, 'xpath'): continue
|
||||||
|
|
||||||
|
for link in XPath('//h:link[@href]')(root):
|
||||||
|
href = item.abshref(link.get('href'))
|
||||||
|
idx = sheets.get(href, None)
|
||||||
|
if idx is not None:
|
||||||
|
idx = to_ref(idx)
|
||||||
|
link.set('href', 'kindle:flow:%s?mime=text/css'%idx)
|
||||||
|
|
||||||
|
for tag in XPath('//h:style')(root):
|
||||||
|
p = tag.getparent()
|
||||||
|
idx = p.index(tag)
|
||||||
|
raw = tag.text
|
||||||
|
if not raw or not raw.strip():
|
||||||
|
extract(tag)
|
||||||
|
continue
|
||||||
|
repl = etree.Element(XHTML('link'), type='text/css',
|
||||||
|
rel='stylesheet')
|
||||||
|
p.insert(idx, repl)
|
||||||
|
extract(tag)
|
||||||
|
inlines[raw].append(repl)
|
||||||
|
|
||||||
|
for raw, elems in inlines.iteritems():
|
||||||
|
self.flows.append(raw)
|
||||||
|
idx = to_ref(len(self.flows))
|
||||||
|
for link in elems:
|
||||||
|
link.set('href', 'kindle:flow:%s?mime=text/css'%idx)
|
||||||
|
|
||||||
|
|
||||||
|
@ -357,7 +357,21 @@ def urlnormalize(href):
|
|||||||
parts = (urlquote(part) for part in parts)
|
parts = (urlquote(part) for part in parts)
|
||||||
return urlunparse(parts)
|
return urlunparse(parts)
|
||||||
|
|
||||||
|
def extract(elem):
|
||||||
|
"""
|
||||||
|
Removes this element from the tree, including its children and
|
||||||
|
text. The tail text is joined to the previous element or
|
||||||
|
parent.
|
||||||
|
"""
|
||||||
|
parent = elem.getparent()
|
||||||
|
if parent is not None:
|
||||||
|
if elem.tail:
|
||||||
|
previous = elem.getprevious()
|
||||||
|
if previous is None:
|
||||||
|
parent.text = (parent.text or '') + elem.tail
|
||||||
|
else:
|
||||||
|
previous.tail = (previous.tail or '') + elem.tail
|
||||||
|
parent.remove(elem)
|
||||||
|
|
||||||
class DummyHandler(logging.Handler):
|
class DummyHandler(logging.Handler):
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user