mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Initial go at OPS -> HTML 3.2 conversion code.
This commit is contained in:
parent
c2cd135cf8
commit
937cbdb20e
243
src/calibre/ebooks/mobi/mobiml.py
Normal file
243
src/calibre/ebooks/mobi/mobiml.py
Normal file
@ -0,0 +1,243 @@
|
|||||||
|
'''
|
||||||
|
Transform XHTML/OPS-ish content into Mobipocket HTML 3.2.
|
||||||
|
'''
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam>'
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import copy
|
||||||
|
import re
|
||||||
|
from lxml import etree
|
||||||
|
from calibre.ebooks.oeb.base import namespace, barename
|
||||||
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
||||||
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
|
from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
|
||||||
|
|
||||||
|
MBP_NS = 'http://mobipocket.com/ns/mbp'
|
||||||
|
def MBP(name): return '{%s}%s' % (MBP_NS, name)
|
||||||
|
|
||||||
|
HEADER_TAGS = set(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
|
||||||
|
NESTABLE_TAGS = set(['ol', 'ul', 'li', 'table', 'tr', 'td'])
|
||||||
|
CONTENT_TAGS = set(['img', 'hr', 'br'])
|
||||||
|
|
||||||
|
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||||
|
|
||||||
|
class BlockState(object):
|
||||||
|
def __init__(self, body):
|
||||||
|
self.body = body
|
||||||
|
self.nested = []
|
||||||
|
self.para = None
|
||||||
|
self.inline = None
|
||||||
|
self.vpadding = 0.
|
||||||
|
self.vmargin = 0.
|
||||||
|
self.left = 0.
|
||||||
|
self.istate = None
|
||||||
|
|
||||||
|
class FormatState(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.halign = 'auto'
|
||||||
|
self.indent = 0.
|
||||||
|
self.fsize = 3
|
||||||
|
self.ids = set()
|
||||||
|
self.valign = 'baseline'
|
||||||
|
self.italic = False
|
||||||
|
self.bold = False
|
||||||
|
self.preserve = True
|
||||||
|
self.href = None
|
||||||
|
self.attrib = {}
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return self.fsize == other.fsize \
|
||||||
|
and self.italic == other.italic \
|
||||||
|
and self.bold == other.bold \
|
||||||
|
and self.href == other.href \
|
||||||
|
and self.valign == other.valign
|
||||||
|
|
||||||
|
|
||||||
|
class MobiMLizer(object):
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def transform(self, oeb, context):
|
||||||
|
self.oeb = oeb
|
||||||
|
self.profile = profile = context.dest
|
||||||
|
self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
|
||||||
|
self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
|
||||||
|
self.mobimlize_spine()
|
||||||
|
|
||||||
|
def mobimlize_spine(self):
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
stylizer = Stylizer(item.data, item.href, self.oeb, self.profile)
|
||||||
|
data = item.data
|
||||||
|
data.remove(data.find(XHTML('head')))
|
||||||
|
body = data.find(XHTML('body'))
|
||||||
|
nbody = etree.Element(XHTML('body'))
|
||||||
|
self.mobimlize_elem(body, stylizer, BlockState(nbody),
|
||||||
|
[FormatState()])
|
||||||
|
data.replace(body, nbody)
|
||||||
|
|
||||||
|
def mobimlize_font(self, ptsize):
|
||||||
|
return self.fnums[self.fmap[ptsize]]
|
||||||
|
|
||||||
|
def mobimlize_measure(self, ptsize):
|
||||||
|
# All MobiML measures occur in the default font-space
|
||||||
|
if isinstance(ptsize, basestring):
|
||||||
|
return ptsize
|
||||||
|
return "%dem" % int(round(ptsize / self.profile.fbase))
|
||||||
|
|
||||||
|
def mobimlize_content(self, tag, text, bstate, istates):
|
||||||
|
istate = istates[-1]
|
||||||
|
if bstate.para is None:
|
||||||
|
bstate.istate = None
|
||||||
|
if tag in NESTABLE_TAGS:
|
||||||
|
parent = bstate.nested[-1] if bstate.nested else bstate.body
|
||||||
|
para = wrapper = etree.SubElement(parent, tag)
|
||||||
|
bstate.nested.append(para)
|
||||||
|
elif bstate.left > 0:
|
||||||
|
para = wrapper = etree.SubElement(bstate.body, 'blockquote')
|
||||||
|
left = int(round(bstate.left / self.profile.fbase)) - 1
|
||||||
|
while left > 0:
|
||||||
|
para = etree.SubElement(para, 'blockquote')
|
||||||
|
left -= 1
|
||||||
|
else:
|
||||||
|
ptag = tag if tag in HEADER_TAGS else 'p'
|
||||||
|
para = wrapper = etree.SubElement(bstate.body, ptag)
|
||||||
|
bstate.inline = bstate.para = para
|
||||||
|
vspace = bstate.vpadding + bstate.vmargin
|
||||||
|
bstate.vpadding = bstate.vmargin = 0
|
||||||
|
wrapper.attrib['height'] = self.mobimlize_measure(vspace)
|
||||||
|
para.attrib['width'] = self.mobimlize_measure(istate.indent)
|
||||||
|
if istate.halign != 'auto':
|
||||||
|
wrapper.attrib['align'] = istate.halign
|
||||||
|
if istate.ids:
|
||||||
|
wrapper.attrib['id'] = istate.ids.pop()
|
||||||
|
pstate = bstate.istate
|
||||||
|
para = bstate.para
|
||||||
|
if istate.ids:
|
||||||
|
for id in istate.ids:
|
||||||
|
etree.SubElement(para, 'a', id=id)
|
||||||
|
if tag in CONTENT_TAGS:
|
||||||
|
bstate.inline = para
|
||||||
|
pstate = bstate.istate = None
|
||||||
|
etree.SubElement(para, tag, attrib=istate.attrib)
|
||||||
|
if not text:
|
||||||
|
return
|
||||||
|
if not pstate or istate != pstate:
|
||||||
|
inline = para
|
||||||
|
valign = istate.valign
|
||||||
|
fsize = istate.fsize
|
||||||
|
href = istate.href
|
||||||
|
if valign == 'super':
|
||||||
|
inline = etree.SubElement(inline, 'sup')
|
||||||
|
elif valign == 'sub':
|
||||||
|
inline = etree.SubElement(inline, 'sub')
|
||||||
|
elif fsize != 3:
|
||||||
|
inline = etree.SubElement(inline, 'font', size=str(fsize))
|
||||||
|
if istate.italic:
|
||||||
|
inline = etree.SubElement(inline, 'i')
|
||||||
|
if istate.bold:
|
||||||
|
inline = etree.SubElement(inline, 'b')
|
||||||
|
if href:
|
||||||
|
inline = etree.SubElement(inline, 'a', href=href)
|
||||||
|
bstate.inline = inline
|
||||||
|
bstate.istate = istate
|
||||||
|
inline = bstate.inline
|
||||||
|
if inline == para:
|
||||||
|
if len(para) == 0:
|
||||||
|
para.text = (para.text or '') + text
|
||||||
|
else:
|
||||||
|
last = para[-1]
|
||||||
|
last.tail = (last.tail or '') + text
|
||||||
|
else:
|
||||||
|
inline.text = (inline.text or '') + text
|
||||||
|
|
||||||
|
def mobimlize_elem(self, elem, stylizer, bstate, istates):
|
||||||
|
if not isinstance(elem.tag, basestring) \
|
||||||
|
or namespace(elem.tag) != XHTML_NS:
|
||||||
|
return
|
||||||
|
istate = copy.copy(istates[-1])
|
||||||
|
istates.append(istate)
|
||||||
|
tag = barename(elem.tag)
|
||||||
|
style = stylizer.style(elem)
|
||||||
|
left = 0
|
||||||
|
isblock = style['display'] not in ('inline', 'inline-block')
|
||||||
|
isblock = isblock and tag != 'br'
|
||||||
|
if isblock:
|
||||||
|
bstate.para = None
|
||||||
|
margin = style['margin-left']
|
||||||
|
if not isinstance(margin, (int, float)):
|
||||||
|
margin = 0
|
||||||
|
padding = style['padding-left']
|
||||||
|
if not isinstance(padding, (int, float)):
|
||||||
|
padding = 0
|
||||||
|
left = margin + padding
|
||||||
|
bstate.left += left
|
||||||
|
bstate.vmargin = max((bstate.vmargin, style['margin-top']))
|
||||||
|
if style['padding-top']:
|
||||||
|
bstate.vpadding += bstate.vmargin
|
||||||
|
bstate.vpadding = style['padding-top']
|
||||||
|
istate.fsize = self.mobimlize_font(style['font-size'])
|
||||||
|
istate.italic = True if style['font-style'] == 'italic' else False
|
||||||
|
weight = style['font-weight']
|
||||||
|
if isinstance(weight, (int, float)):
|
||||||
|
istate.bold = True if weight > 400 else False
|
||||||
|
else:
|
||||||
|
istate.bold = True if weight in ('bold', 'bolder') else False
|
||||||
|
istate.indent = style['text-indent']
|
||||||
|
istate.halign = style['text-align']
|
||||||
|
istate.preserve = (style['white-space'] in ('pre', 'pre-wrap'))
|
||||||
|
valign = style['vertical-align']
|
||||||
|
if valign in ('super', 'sup') \
|
||||||
|
or (isinstance(valign, (int, float)) and valign > 0):
|
||||||
|
istate.valign = 'super'
|
||||||
|
elif valign == 'sub' \
|
||||||
|
or (isinstance(valign, (int, float)) and valign < 0):
|
||||||
|
istate.valign = 'sub'
|
||||||
|
else:
|
||||||
|
istate.valign = 'baseline'
|
||||||
|
if 'id' in elem.attrib:
|
||||||
|
istate.ids.add(elem.attrib['id'])
|
||||||
|
if 'name' in elem.attrib:
|
||||||
|
istate.ids.add(elem.attrib['name'])
|
||||||
|
if tag == 'a' and 'href' in elem.attrib:
|
||||||
|
istate.href = elem.attrib['href']
|
||||||
|
istate.attrib.clear()
|
||||||
|
if tag == 'img' and 'src' in elem.attrib:
|
||||||
|
istate.attrib['src'] = elem.attrib['src']
|
||||||
|
if tag == 'hr' and 'width' in style.cssdict():
|
||||||
|
istate.attrib['width'] = mobimlize_measure(style['width'])
|
||||||
|
text = None
|
||||||
|
if elem.text:
|
||||||
|
if istate.preserve:
|
||||||
|
text = elem.text
|
||||||
|
elif len(elem) > 0 and elem.text.isspace():
|
||||||
|
text = None
|
||||||
|
else:
|
||||||
|
text = COLLAPSE.sub(' ', elem.text)
|
||||||
|
if text or tag in CONTENT_TAGS:
|
||||||
|
self.mobimlize_content(tag, text, bstate, istates)
|
||||||
|
for child in elem:
|
||||||
|
self.mobimlize_elem(child, stylizer, bstate, istates)
|
||||||
|
tail = None
|
||||||
|
if child.tail:
|
||||||
|
if istate.preserve:
|
||||||
|
tail = child.tail
|
||||||
|
elif bstate.para is None and child.text.isspace():
|
||||||
|
tail = None
|
||||||
|
else:
|
||||||
|
tail = COLLAPSE.sub(' ', child.tail)
|
||||||
|
if tail:
|
||||||
|
self.mobimlize_content(tag, tail, bstate, istates)
|
||||||
|
if isblock:
|
||||||
|
bstate.para = None
|
||||||
|
bstate.left -= left
|
||||||
|
bstate.vmargin = max((bstate.vmargin, style['margin-bottom']))
|
||||||
|
if style['padding-bottom']:
|
||||||
|
bstate.vpadding += bstate.vmargin
|
||||||
|
bstate.vpadding = style['padding-bottom']
|
||||||
|
if bstate.nested:
|
||||||
|
bstate.nested.pop()
|
||||||
|
istates.pop()
|
@ -19,14 +19,14 @@ from collections import defaultdict
|
|||||||
from urlparse import urldefrag
|
from urlparse import urldefrag
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from calibre.ebooks.mobi.palmdoc import compress_doc
|
|
||||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
|
||||||
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS
|
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS
|
||||||
from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname
|
from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname
|
||||||
from calibre.ebooks.oeb.base import FauxLogger, OEBBook
|
from calibre.ebooks.oeb.base import FauxLogger, OEBBook
|
||||||
|
from calibre.ebooks.oeb.profile import Context
|
||||||
MBP_NS = 'http://mobipocket.com/ns/mbp'
|
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
|
||||||
def MBP(name): return '{%s}%s' % (MBP_NS, name)
|
from calibre.ebooks.mobi.palmdoc import compress_doc
|
||||||
|
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||||
|
from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer
|
||||||
|
|
||||||
EXTH_CODES = {
|
EXTH_CODES = {
|
||||||
'creator': 100,
|
'creator': 100,
|
||||||
@ -138,9 +138,10 @@ class Serializer(object):
|
|||||||
buffer.write(' <mbp:pagebreak/>')
|
buffer.write(' <mbp:pagebreak/>')
|
||||||
|
|
||||||
def serialize_elem(self, elem, item, nsrmap=NSRMAP):
|
def serialize_elem(self, elem, item, nsrmap=NSRMAP):
|
||||||
if namespace(elem.tag) not in nsrmap:
|
|
||||||
return
|
|
||||||
buffer = self.buffer
|
buffer = self.buffer
|
||||||
|
if not isinstance(elem.tag, basestring) \
|
||||||
|
or namespace(elem.tag) not in nsrmap:
|
||||||
|
return
|
||||||
hrefs = self.oeb.manifest.hrefs
|
hrefs = self.oeb.manifest.hrefs
|
||||||
tag = prefixname(elem.tag, nsrmap)
|
tag = prefixname(elem.tag, nsrmap)
|
||||||
for attr in ('name', 'id'):
|
for attr in ('name', 'id'):
|
||||||
@ -170,11 +171,11 @@ class Serializer(object):
|
|||||||
buffer.write(encode(elem.text))
|
buffer.write(encode(elem.text))
|
||||||
for child in elem:
|
for child in elem:
|
||||||
self.serialize_elem(child, item)
|
self.serialize_elem(child, item)
|
||||||
|
if child.tail:
|
||||||
|
buffer.write(encode(child.tail))
|
||||||
buffer.write('</%s>' % tag)
|
buffer.write('</%s>' % tag)
|
||||||
else:
|
else:
|
||||||
buffer.write('/>')
|
buffer.write('/>')
|
||||||
if elem.tail:
|
|
||||||
buffer.write(encode(elem.tail))
|
|
||||||
|
|
||||||
def fixup_links(self):
|
def fixup_links(self):
|
||||||
buffer = self.buffer
|
buffer = self.buffer
|
||||||
@ -380,9 +381,18 @@ class MobiWriter(object):
|
|||||||
|
|
||||||
|
|
||||||
def main(argv=sys.argv):
|
def main(argv=sys.argv):
|
||||||
|
from calibre.ebooks.oeb.base import DirWriter
|
||||||
inpath, outpath = argv[1:]
|
inpath, outpath = argv[1:]
|
||||||
|
context = Context('MSReader', 'Cybook3')
|
||||||
oeb = OEBBook(inpath)
|
oeb = OEBBook(inpath)
|
||||||
writer = MobiWriter()
|
writer = MobiWriter()
|
||||||
|
#writer = DirWriter()
|
||||||
|
fbase = context.dest.fbase
|
||||||
|
fkey = context.dest.fnums.values()
|
||||||
|
flattener = CSSFlattener(unfloat=True, fbase=fbase, fkey=fkey)
|
||||||
|
mobimlizer = MobiMLizer()
|
||||||
|
flattener.transform(oeb, context)
|
||||||
|
mobimlizer.transform(oeb, context)
|
||||||
writer.dump(oeb, outpath)
|
writer.dump(oeb, outpath)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
60
src/calibre/ebooks/oeb/profile.py
Normal file
60
src/calibre/ebooks/oeb/profile.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
'''
|
||||||
|
Device profiles.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
|
||||||
|
from itertools import izip
|
||||||
|
|
||||||
|
FONT_SIZES = [('xx-small', 1),
|
||||||
|
('x-small', None),
|
||||||
|
('small', 2),
|
||||||
|
('medium', 3),
|
||||||
|
('large', 4),
|
||||||
|
('x-large', 5),
|
||||||
|
('xx-large', 6),
|
||||||
|
(None, 7)]
|
||||||
|
|
||||||
|
|
||||||
|
class Profile(object):
|
||||||
|
def __init__(self, width, height, dpi, fbase, fsizes):
|
||||||
|
self.width = (float(width) / dpi) * 72.
|
||||||
|
self.height = (float(height) / dpi) * 72.
|
||||||
|
self.dpi = float(dpi)
|
||||||
|
self.fbase = float(fbase)
|
||||||
|
self.fsizes = []
|
||||||
|
for (name, num), size in izip(FONT_SIZES, fsizes):
|
||||||
|
self.fsizes.append((name, num, float(size)))
|
||||||
|
self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
|
||||||
|
self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
|
||||||
|
|
||||||
|
|
||||||
|
PROFILES = {
|
||||||
|
'PRS505':
|
||||||
|
Profile(width=584, height=754, dpi=168.451, fbase=12,
|
||||||
|
fsizes=[7.5, 9, 10, 12, 15.5, 20, 22, 24]),
|
||||||
|
|
||||||
|
'MSReader':
|
||||||
|
Profile(width=480, height=652, dpi=100.0, fbase=13,
|
||||||
|
fsizes=[10, 11, 13, 16, 18, 20, 22, 26]),
|
||||||
|
|
||||||
|
# No clue on usable screen size and DPI
|
||||||
|
'Cybook3':
|
||||||
|
Profile(width=584, height=754, dpi=168.451, fbase=12,
|
||||||
|
fsizes=[9, 10, 11, 12, 14, 16, 18, 20]),
|
||||||
|
|
||||||
|
'Firefox':
|
||||||
|
Profile(width=800, height=600, dpi=100.0, fbase=12,
|
||||||
|
fsizes=[5, 7, 9, 12, 13.5, 17, 20, 22, 24])
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class Context(object):
|
||||||
|
def __init__(self, source, dest):
|
||||||
|
if source in PROFILES:
|
||||||
|
source = PROFILES[source]
|
||||||
|
if dest in PROFILES:
|
||||||
|
dest = PROFILES[dest]
|
||||||
|
self.source = source
|
||||||
|
self.dest = dest
|
@ -24,6 +24,7 @@ from lxml import etree
|
|||||||
from lxml.cssselect import css_to_xpath, ExpressionError
|
from lxml.cssselect import css_to_xpath, ExpressionError
|
||||||
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
|
||||||
from calibre.ebooks.oeb.base import barename, urlnormalize
|
from calibre.ebooks.oeb.base import barename, urlnormalize
|
||||||
|
from calibre.ebooks.oeb.profile import PROFILES
|
||||||
from calibre.resources import html_css
|
from calibre.resources import html_css
|
||||||
|
|
||||||
XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS
|
XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS
|
||||||
@ -75,7 +76,7 @@ DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll',
|
|||||||
'50', 'right': 'auto', 'speak': 'normal', 'speak-header': 'once',
|
'50', 'right': 'auto', 'speak': 'normal', 'speak-header': 'once',
|
||||||
'speak-numeral': 'continuous', 'speak-punctuation': 'none',
|
'speak-numeral': 'continuous', 'speak-punctuation': 'none',
|
||||||
'speech-rate': 'medium', 'stress': '50', 'table-layout': 'auto',
|
'speech-rate': 'medium', 'stress': '50', 'table-layout': 'auto',
|
||||||
'text-align': 'left', 'text-decoration': 'none', 'text-indent':
|
'text-align': 'auto', 'text-decoration': 'none', 'text-indent':
|
||||||
0, 'text-transform': 'none', 'top': 'auto', 'unicode-bidi':
|
0, 'text-transform': 'none', 'top': 'auto', 'unicode-bidi':
|
||||||
'normal', 'vertical-align': 'baseline', 'visibility': 'visible',
|
'normal', 'vertical-align': 'baseline', 'visibility': 'visible',
|
||||||
'voice-family': 'default', 'volume': 'medium', 'white-space':
|
'voice-family': 'default', 'volume': 'medium', 'white-space':
|
||||||
@ -85,15 +86,6 @@ DEFAULTS = {'azimuth': 'center', 'background-attachment': 'scroll',
|
|||||||
FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
|
FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
|
||||||
'x-large', 'xx-large'])
|
'x-large', 'xx-large'])
|
||||||
|
|
||||||
FONT_SIZES = [('xx-small', 1),
|
|
||||||
('x-small', None),
|
|
||||||
('small', 2),
|
|
||||||
('medium', 3),
|
|
||||||
('large', 4),
|
|
||||||
('x-large', 5),
|
|
||||||
('xx-large', 6),
|
|
||||||
(None, 7)]
|
|
||||||
|
|
||||||
|
|
||||||
XPNSMAP = {'h': XHTML_NS,}
|
XPNSMAP = {'h': XHTML_NS,}
|
||||||
def xpath(elem, expr):
|
def xpath(elem, expr):
|
||||||
@ -112,28 +104,11 @@ class CSSSelector(etree.XPath):
|
|||||||
self.css)
|
self.css)
|
||||||
|
|
||||||
|
|
||||||
class Page(object):
|
|
||||||
def __init__(self, width, height, dpi, fbase, fsizes):
|
|
||||||
self.width = (float(width) / dpi) * 72.
|
|
||||||
self.height = (float(height) / dpi) * 72.
|
|
||||||
self.dpi = float(dpi)
|
|
||||||
self.fbase = float(fbase)
|
|
||||||
self.fsizes = []
|
|
||||||
for (name, num), size in izip(FONT_SIZES, fsizes):
|
|
||||||
self.fsizes.append((name, num, float(size)))
|
|
||||||
self.fnames = dict((name, sz) for name, _, sz in self.fsizes if name)
|
|
||||||
self.fnums = dict((num, sz) for _, num, sz in self.fsizes if num)
|
|
||||||
|
|
||||||
class Profiles(object):
|
|
||||||
PRS505 = Page(584, 754, 168.451, 12, [7.5, 9, 10, 12, 15.5, 20, 22, 24])
|
|
||||||
MSLIT = Page(652, 480, 100.0, 13, [10, 11, 13, 16, 18, 20, 22, 26])
|
|
||||||
|
|
||||||
|
|
||||||
class Stylizer(object):
|
class Stylizer(object):
|
||||||
STYLESHEETS = {}
|
STYLESHEETS = {}
|
||||||
|
|
||||||
def __init__(self, tree, path, oeb, page=Profiles.PRS505):
|
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505']):
|
||||||
self.page = page
|
self.profile = profile
|
||||||
base = os.path.dirname(path)
|
base = os.path.dirname(path)
|
||||||
basename = os.path.basename(path)
|
basename = os.path.basename(path)
|
||||||
cssname = os.path.splitext(basename)[0] + '.css'
|
cssname = os.path.splitext(basename)[0] + '.css'
|
||||||
@ -215,7 +190,7 @@ class Stylizer(object):
|
|||||||
size = style['font-size']
|
size = style['font-size']
|
||||||
if size == 'normal': size = 'medium'
|
if size == 'normal': size = 'medium'
|
||||||
if size in FONT_SIZE_NAMES:
|
if size in FONT_SIZE_NAMES:
|
||||||
style['font-size'] = "%dpt" % self.page.fnames[size]
|
style['font-size'] = "%dpt" % self.profile.fnames[size]
|
||||||
return style
|
return style
|
||||||
|
|
||||||
def _normalize_edge(self, cssvalue, name):
|
def _normalize_edge(self, cssvalue, name):
|
||||||
@ -284,7 +259,7 @@ class Stylizer(object):
|
|||||||
class Style(object):
|
class Style(object):
|
||||||
def __init__(self, element, stylizer):
|
def __init__(self, element, stylizer):
|
||||||
self._element = element
|
self._element = element
|
||||||
self._page = stylizer.page
|
self._profile = stylizer.profile
|
||||||
self._stylizer = stylizer
|
self._stylizer = stylizer
|
||||||
self._style = {}
|
self._style = {}
|
||||||
stylizer._styles[element] = self
|
stylizer._styles[element] = self
|
||||||
@ -340,7 +315,7 @@ class Style(object):
|
|||||||
base = base or self.width
|
base = base or self.width
|
||||||
result = (value/100.0) * base
|
result = (value/100.0) * base
|
||||||
elif unit == 'px':
|
elif unit == 'px':
|
||||||
result = value * 72.0 / self._page.dpi
|
result = value * 72.0 / self._profile.dpi
|
||||||
elif unit == 'in':
|
elif unit == 'in':
|
||||||
result = value * 72.0
|
result = value * 72.0
|
||||||
elif unit == 'pt':
|
elif unit == 'pt':
|
||||||
@ -363,18 +338,18 @@ class Style(object):
|
|||||||
factor = None
|
factor = None
|
||||||
if value == 'inherit':
|
if value == 'inherit':
|
||||||
# We should only see this if the root element
|
# We should only see this if the root element
|
||||||
value = self._page.fbase
|
value = self._profile.fbase
|
||||||
if value in FONT_SIZE_NAMES:
|
if value in FONT_SIZE_NAMES:
|
||||||
result = self._page.fnames[value]
|
result = self._profile.fnames[value]
|
||||||
elif value == 'smaller':
|
elif value == 'smaller':
|
||||||
factor = 1.0/1.2
|
factor = 1.0/1.2
|
||||||
for _, _, size in self._page.fsizes:
|
for _, _, size in self._profile.fsizes:
|
||||||
if base <= size: break
|
if base <= size: break
|
||||||
factor = None
|
factor = None
|
||||||
result = size
|
result = size
|
||||||
elif value == 'larger':
|
elif value == 'larger':
|
||||||
factor = 1.2
|
factor = 1.2
|
||||||
for _, _, size in reversed(self._page.fsizes):
|
for _, _, size in reversed(self._profile.fsizes):
|
||||||
if base >= size: break
|
if base >= size: break
|
||||||
factor = None
|
factor = None
|
||||||
result = size
|
result = size
|
||||||
@ -390,7 +365,7 @@ class Style(object):
|
|||||||
styles = self._stylizer._styles
|
styles = self._stylizer._styles
|
||||||
base = styles[self._element.getparent()].fontSize
|
base = styles[self._element.getparent()].fontSize
|
||||||
else:
|
else:
|
||||||
base = self._page.fbase
|
base = self._profile.fbase
|
||||||
if 'font-size' in self._style:
|
if 'font-size' in self._style:
|
||||||
size = self._style['font-size']
|
size = self._style['font-size']
|
||||||
result = normalize_fontsize(size, base)
|
result = normalize_fontsize(size, base)
|
||||||
@ -407,7 +382,7 @@ class Style(object):
|
|||||||
styles = self._stylizer._styles
|
styles = self._stylizer._styles
|
||||||
base = styles[self._element.getparent()].width
|
base = styles[self._element.getparent()].width
|
||||||
else:
|
else:
|
||||||
base = self._page.width
|
base = self._profile.width
|
||||||
if 'width' in self._style:
|
if 'width' in self._style:
|
||||||
width = self._style['width']
|
width = self._style['width']
|
||||||
if width == 'auto':
|
if width == 'auto':
|
||||||
|
0
src/calibre/ebooks/oeb/transforms/__init__.py
Normal file
0
src/calibre/ebooks/oeb/transforms/__init__.py
Normal file
239
src/calibre/ebooks/oeb/transforms/flatcss.py
Normal file
239
src/calibre/ebooks/oeb/transforms/flatcss.py
Normal file
@ -0,0 +1,239 @@
|
|||||||
|
'''
|
||||||
|
CSS flattening transform.
|
||||||
|
'''
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import operator
|
||||||
|
import math
|
||||||
|
from itertools import chain
|
||||||
|
from collections import defaultdict
|
||||||
|
from lxml import etree
|
||||||
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS
|
||||||
|
from calibre.ebooks.oeb.base import CSS_MIME, OEB_STYLES
|
||||||
|
from calibre.ebooks.oeb.base import namespace, barename
|
||||||
|
from calibre.ebooks.oeb.base import OEBBook
|
||||||
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
|
|
||||||
|
BASEFONT_CSS = 'body { font-size: %0.5fpt; }'
|
||||||
|
|
||||||
|
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
||||||
|
STRIPNUM = re.compile(r'[-0-9]+$')
|
||||||
|
|
||||||
|
class KeyMapper(object):
|
||||||
|
def __init__(self, sbase, dbase, dkey):
|
||||||
|
self.sbase = float(sbase)
|
||||||
|
self.dprop = [(self.relate(x, dbase), float(x)) for x in dkey]
|
||||||
|
self.cache = {}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def relate(size, base):
|
||||||
|
size = float(size)
|
||||||
|
base = float(base)
|
||||||
|
if size == base: return 0
|
||||||
|
sign = -1 if size < base else 1
|
||||||
|
endp = 0 if size < base else 36
|
||||||
|
diff = (abs(base - size) * 3) + ((36 - size) / 100)
|
||||||
|
logb = abs(base - endp)
|
||||||
|
return sign * math.log(diff, logb)
|
||||||
|
|
||||||
|
def __getitem__(self, ssize):
|
||||||
|
if ssize in self.cache:
|
||||||
|
return self.cache[ssize]
|
||||||
|
dsize = self.map(ssize)
|
||||||
|
self.cache[ssize] = dsize
|
||||||
|
return dsize
|
||||||
|
|
||||||
|
def map(self, ssize):
|
||||||
|
sbase = self.sbase
|
||||||
|
prop = self.relate(ssize, sbase)
|
||||||
|
diff = [(abs(prop - p), s) for p, s in self.dprop]
|
||||||
|
dsize = min(diff)[1]
|
||||||
|
return dsize
|
||||||
|
|
||||||
|
class ScaleMapper(object):
|
||||||
|
def __init__(self, sbase, dbase):
|
||||||
|
self.dscale = float(dbase) / float(sbase)
|
||||||
|
|
||||||
|
def __getitem__(self, ssize):
|
||||||
|
dsize = ssize * self.dscale
|
||||||
|
return dsize
|
||||||
|
|
||||||
|
class NullMapper(object):
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def __getitem__(self, ssize):
|
||||||
|
return ssize
|
||||||
|
|
||||||
|
def FontMapper(sbase=None, dbase=None, dkey=None):
|
||||||
|
if sbase and dbase and dkey:
|
||||||
|
return KeyMapper(sbase, dbase, dkey)
|
||||||
|
elif sbase and dbase:
|
||||||
|
return ScaleMapper(sbase, dbase)
|
||||||
|
else:
|
||||||
|
return NullMapper()
|
||||||
|
|
||||||
|
|
||||||
|
class CSSFlattener(object):
|
||||||
|
def __init__(self, unfloat=False, fbase=None, fkey=None, lineh=None):
|
||||||
|
self.unfloat = unfloat
|
||||||
|
self.fbase = fbase
|
||||||
|
self.fkey = fkey
|
||||||
|
self.lineh = lineh
|
||||||
|
|
||||||
|
def transform(self, oeb, context):
|
||||||
|
self.oeb = oeb
|
||||||
|
self.context = context
|
||||||
|
self.premangle_css()
|
||||||
|
self.stylize_spine()
|
||||||
|
self.sbase = self.baseline_spine() if self.fbase else None
|
||||||
|
self.fmap = FontMapper(self.sbase, self.fbase, self.fkey)
|
||||||
|
self.flatten_spine()
|
||||||
|
|
||||||
|
def premangle_css(self):
|
||||||
|
fbase = self.context.source.fbase
|
||||||
|
for item in self.oeb.manifest.values():
|
||||||
|
if item.media_type in OEB_STYLES:
|
||||||
|
basefont_css = BASEFONT_CSS % (fbase,)
|
||||||
|
item.data = basefont_css + item.data
|
||||||
|
|
||||||
|
def stylize_spine(self):
|
||||||
|
self.stylizers = {}
|
||||||
|
profile = self.context.source
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
html = item.data
|
||||||
|
stylizer = Stylizer(html, item.href, self.oeb, profile)
|
||||||
|
self.stylizers[item] = stylizer
|
||||||
|
|
||||||
|
def baseline_node(self, node, stylizer, sizes, csize):
|
||||||
|
if node.tail:
|
||||||
|
sizes[csize] += len(COLLAPSE.sub(' ', node.tail))
|
||||||
|
csize = stylizer.style(node)['font-size']
|
||||||
|
if node.text:
|
||||||
|
sizes[csize] += len(COLLAPSE.sub(' ', node.text))
|
||||||
|
for child in node:
|
||||||
|
self.baseline_node(child, stylizer, sizes, csize)
|
||||||
|
|
||||||
|
def baseline_spine(self):
|
||||||
|
sizes = defaultdict(float)
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
html = item.data
|
||||||
|
stylizer = self.stylizers[item]
|
||||||
|
body = html.find(XHTML('body'))
|
||||||
|
fsize = self.context.source.fbase
|
||||||
|
self.baseline_node(body, stylizer, sizes, fsize)
|
||||||
|
sbase = max(sizes.items(), key=operator.itemgetter(1))[0]
|
||||||
|
return sbase
|
||||||
|
|
||||||
|
def clean_edges(self, cssdict, style, fsize):
|
||||||
|
slineh = self.sbase * 1.26
|
||||||
|
dlineh = self.lineh
|
||||||
|
for kind in ('margin', 'padding'):
|
||||||
|
for edge in ('bottom', 'top'):
|
||||||
|
property = "%s-%s" % (kind, edge)
|
||||||
|
if property not in cssdict: continue
|
||||||
|
if '%' in cssdict[property]: continue
|
||||||
|
value = style[property]
|
||||||
|
if value == 0:
|
||||||
|
continue
|
||||||
|
elif value <= slineh:
|
||||||
|
cssdict[property] = "%0.5fem" % (dlineh / fsize)
|
||||||
|
else:
|
||||||
|
value = round(value / slineh) * dlineh
|
||||||
|
cssdict[property] = "%0.5fem" % (value / fsize)
|
||||||
|
|
||||||
|
def flatten_node(self, node, stylizer, names, styles, psize, left=0):
|
||||||
|
if not isinstance(node.tag, basestring) \
|
||||||
|
or namespace(node.tag) != XHTML_NS:
|
||||||
|
return
|
||||||
|
tag = barename(node.tag)
|
||||||
|
style = stylizer.style(node)
|
||||||
|
cssdict = style.cssdict()
|
||||||
|
if cssdict:
|
||||||
|
if 'font-size' in cssdict:
|
||||||
|
fsize = self.fmap[style['font-size']]
|
||||||
|
cssdict['font-size'] = "%0.5fem" % (fsize / psize)
|
||||||
|
psize = fsize
|
||||||
|
if self.lineh and self.fbase and tag != 'body':
|
||||||
|
self.clean_edges(cssdict, style, psize)
|
||||||
|
margin = style['margin-left']
|
||||||
|
left += margin if isinstance(margin, float) else 0
|
||||||
|
if (left + style['text-indent']) < 0:
|
||||||
|
percent = (margin - style['text-indent']) / style['width']
|
||||||
|
cssdict['margin-left'] = "%d%%" % (percent * 100)
|
||||||
|
left -= style['text-indent']
|
||||||
|
if self.unfloat and 'float' in cssdict and tag != 'img':
|
||||||
|
del cssdict['float']
|
||||||
|
if self.lineh and 'line-height' not in cssdict:
|
||||||
|
lineh = self.lineh / psize
|
||||||
|
cssdict['line-height'] = "%0.5fem" % lineh
|
||||||
|
if cssdict:
|
||||||
|
items = cssdict.items()
|
||||||
|
items.sort()
|
||||||
|
css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items)
|
||||||
|
klass = STRIPNUM.sub('', node.get('class', 'calibre').split()[0])
|
||||||
|
if css in styles:
|
||||||
|
match = styles[css]
|
||||||
|
else:
|
||||||
|
match = klass + str(names[klass] or '')
|
||||||
|
styles[css] = match
|
||||||
|
names[klass] += 1
|
||||||
|
node.attrib['class'] = match
|
||||||
|
elif 'class' in node.attrib:
|
||||||
|
del node.attrib['class']
|
||||||
|
if 'style' in node.attrib:
|
||||||
|
del node.attrib['style']
|
||||||
|
for child in node:
|
||||||
|
self.flatten_node(child, stylizer, names, styles, psize, left)
|
||||||
|
|
||||||
|
def flatten_head(self, head, stylizer, href):
|
||||||
|
for node in head:
|
||||||
|
if node.tag == XHTML('link') \
|
||||||
|
and node.get('rel', 'stylesheet') == 'stylesheet' \
|
||||||
|
and node.get('type', CSS_MIME) in OEB_STYLES:
|
||||||
|
head.remove(node)
|
||||||
|
elif node.tag == XHTML('style') \
|
||||||
|
and node.get('type', CSS_MIME) in OEB_STYLES:
|
||||||
|
head.remove(node)
|
||||||
|
etree.SubElement(head, XHTML('link'),
|
||||||
|
rel='stylesheet', type=CSS_MIME, href=href)
|
||||||
|
if stylizer.page_rule:
|
||||||
|
items = stylizer.page_rule.items()
|
||||||
|
items.sort()
|
||||||
|
css = '; '.join("%s: %s" % (key, val) for key, val in items)
|
||||||
|
style = etree.SubElement(head, XHTML('style'), type=CSS_MIME)
|
||||||
|
style.text = "@page { %s; }" % css
|
||||||
|
|
||||||
|
def replace_css(self, css):
|
||||||
|
manifest = self.oeb.manifest
|
||||||
|
id, href = manifest.generate('css', 'stylesheet.css')
|
||||||
|
for item in manifest.values():
|
||||||
|
if item.media_type in OEB_STYLES:
|
||||||
|
manifest.remove(item)
|
||||||
|
item = manifest.add(id, href, CSS_MIME, data=css)
|
||||||
|
return href
|
||||||
|
|
||||||
|
def flatten_spine(self):
|
||||||
|
names = defaultdict(int)
|
||||||
|
styles = {}
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
html = item.data
|
||||||
|
stylizer = self.stylizers[item]
|
||||||
|
body = html.find(XHTML('body'))
|
||||||
|
fsize = self.context.dest.fbase
|
||||||
|
self.flatten_node(body, stylizer, names, styles, fsize)
|
||||||
|
items = [(key, val) for (val, key) in styles.items()]
|
||||||
|
items.sort()
|
||||||
|
css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items)
|
||||||
|
href = self.replace_css(css)
|
||||||
|
for item in self.oeb.spine:
|
||||||
|
html = item.data
|
||||||
|
stylizer = self.stylizers[item]
|
||||||
|
head = html.find(XHTML('head'))
|
||||||
|
self.flatten_head(head, stylizer, href)
|
Loading…
x
Reference in New Issue
Block a user