Various tweaks and improvements to Mobi generation.

This commit is contained in:
Marshall T. Vandegrift 2009-01-04 23:30:47 -05:00
parent 8348264198
commit d3f12fcf36
5 changed files with 168 additions and 97 deletions

View File

@ -29,6 +29,11 @@ PAGE_BREAKS = set(['always', 'odd', 'even'])
COLLAPSE = re.compile(r'[ \t\r\n\v]+') COLLAPSE = re.compile(r'[ \t\r\n\v]+')
def asfloat(value):
if not isinstance(value, (int, long, float)):
return 0.0
return float(value)
class BlockState(object): class BlockState(object):
def __init__(self, body): def __init__(self, body):
self.body = body self.body = body
@ -37,12 +42,12 @@ class BlockState(object):
self.inline = None self.inline = None
self.vpadding = 0. self.vpadding = 0.
self.vmargin = 0. self.vmargin = 0.
self.left = 0.
self.pbreak = False self.pbreak = False
self.istate = None self.istate = None
class FormatState(object): class FormatState(object):
def __init__(self): def __init__(self):
self.left = 0.
self.halign = 'auto' self.halign = 'auto'
self.indent = 0. self.indent = 0.
self.fsize = 3 self.fsize = 3
@ -51,6 +56,7 @@ class FormatState(object):
self.italic = False self.italic = False
self.bold = False self.bold = False
self.preserve = True self.preserve = True
self.family = 'serif'
self.href = None self.href = None
self.list_num = 0 self.list_num = 0
self.attrib = {} self.attrib = {}
@ -60,7 +66,9 @@ class FormatState(object):
and self.italic == other.italic \ and self.italic == other.italic \
and self.bold == other.bold \ and self.bold == other.bold \
and self.href == other.href \ and self.href == other.href \
and self.valign == other.valign and self.valign == other.valign \
and self.preserve == other.preserve \
and self.family == other.family
def __ne__(self, other): def __ne__(self, other):
return not self.__eq__(other) return not self.__eq__(other)
@ -94,7 +102,6 @@ class MobiMLizer(object):
def mobimlize_measure(self, ptsize): def mobimlize_measure(self, ptsize):
if isinstance(ptsize, basestring): if isinstance(ptsize, basestring):
return ptsize return ptsize
# All MobiML measures occur in the default font-space
fbase = self.profile.fbase fbase = self.profile.fbase
if ptsize < fbase: if ptsize < fbase:
return "%dpt" % int(round(ptsize * 2)) return "%dpt" % int(round(ptsize * 2))
@ -116,7 +123,7 @@ class MobiMLizer(object):
istate = istates[-1] istate = istates[-1]
if istate.ids: if istate.ids:
body = bstate.body body = bstate.body
index = max((0, len(body) - 2)) index = max((0, len(body) - 1))
for id in istate.ids: for id in istate.ids:
body.insert(index, etree.Element('a', attrib={'id': id})) body.insert(index, etree.Element('a', attrib={'id': id}))
istate.ids.clear() istate.ids.clear()
@ -126,22 +133,30 @@ class MobiMLizer(object):
elif para is None: elif para is None:
bstate.istate = None bstate.istate = None
parent = bstate.nested[-1] if bstate.nested else bstate.body parent = bstate.nested[-1] if bstate.nested else bstate.body
indent = istate.indent
left = istate.left
if indent < 0 and abs(indent) < left:
left += indent
indent = 0
elif indent != 0 and abs(indent) < self.profile.fbase:
indent = (indent / abs(indent)) * self.profile.fbase
if bstate.pbreak: if bstate.pbreak:
etree.SubElement(parent, MBP('pagebreak')) etree.SubElement(parent, MBP('pagebreak'))
bstate.pbreak = False bstate.pbreak = False
if tag in NESTABLE_TAGS: if tag in NESTABLE_TAGS:
para = wrapper = etree.SubElement(parent, tag) para = wrapper = etree.SubElement(parent, tag)
bstate.nested.append(para) bstate.nested.append(para)
# Should instead support full CSS lists?
if tag == 'li' and len(istates) > 1: if tag == 'li' and len(istates) > 1:
istates[-2].list_num += 1 istates[-2].list_num += 1
para.attrib['value'] = str(istates[-2].list_num) para.attrib['value'] = str(istates[-2].list_num)
elif bstate.left > 0 and istate.indent >= 0: elif left > 0 and indent >= 0:
para = wrapper = etree.SubElement(parent, 'blockquote') para = wrapper = etree.SubElement(parent, 'blockquote')
left = int(round(bstate.left / self.profile.fbase)) - 1 para = wrapper
while left > 0: emleft = int(round(left / self.profile.fbase)) - 1
emleft = min((emleft, 10))
while emleft > 0:
para = etree.SubElement(para, 'blockquote') para = etree.SubElement(para, 'blockquote')
left -= 1 emleft -= 1
else: else:
ptag = tag if tag in HEADER_TAGS else 'p' ptag = tag if tag in HEADER_TAGS else 'p'
para = wrapper = etree.SubElement(parent, ptag) para = wrapper = etree.SubElement(parent, ptag)
@ -150,7 +165,14 @@ class MobiMLizer(object):
bstate.vpadding = bstate.vmargin = 0 bstate.vpadding = bstate.vmargin = 0
if tag not in TABLE_TAGS: if tag not in TABLE_TAGS:
wrapper.attrib['height'] = self.mobimlize_measure(vspace) wrapper.attrib['height'] = self.mobimlize_measure(vspace)
para.attrib['width'] = self.mobimlize_measure(istate.indent) para.attrib['width'] = self.mobimlize_measure(indent)
elif tag == 'table' and vspace > 0:
body = bstate.body
vspace = int(round(vspace / self.profile.fbase))
index = max((0, len(body) - 1))
while vspace > 0:
body.insert(index, etree.Element('br'))
vspace -= 1
if istate.halign != 'auto': if istate.halign != 'auto':
para.attrib['align'] = istate.halign para.attrib['align'] = istate.halign
pstate = bstate.istate pstate = bstate.istate
@ -158,6 +180,8 @@ class MobiMLizer(object):
bstate.inline = para bstate.inline = para
pstate = bstate.istate = None pstate = bstate.istate = None
etree.SubElement(para, tag, attrib=istate.attrib) etree.SubElement(para, tag, attrib=istate.attrib)
elif tag in TABLE_TAGS:
para.attrib['valign'] = 'top'
if not text: if not text:
return return
if not pstate or istate != pstate: if not pstate or istate != pstate:
@ -169,7 +193,7 @@ class MobiMLizer(object):
inline = etree.SubElement(inline, 'sup') inline = etree.SubElement(inline, 'sup')
elif valign == 'sub': elif valign == 'sub':
inline = etree.SubElement(inline, 'sub') inline = etree.SubElement(inline, 'sub')
if istate.preserve: if istate.family == 'monospace':
inline = etree.SubElement(inline, 'tt') inline = etree.SubElement(inline, 'tt')
if fsize != 3: if fsize != 3:
inline = etree.SubElement(inline, 'font', size=str(fsize)) inline = etree.SubElement(inline, 'font', size=str(fsize))
@ -182,8 +206,8 @@ class MobiMLizer(object):
bstate.inline = inline bstate.inline = inline
bstate.istate = istate bstate.istate = istate
inline = bstate.inline inline = bstate.inline
items = self.preize_text(text) if istate.preserve else [text] content = self.preize_text(text) if istate.preserve else [text]
for item in items: for item in content:
if isinstance(item, basestring): if isinstance(item, basestring):
if len(inline) == 0: if len(inline) == 0:
inline.text = (inline.text or '') + item inline.text = (inline.text or '') + item
@ -197,47 +221,67 @@ class MobiMLizer(object):
if not isinstance(elem.tag, basestring) \ if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS: or namespace(elem.tag) != XHTML_NS:
return return
style = stylizer.style(elem)
if style['display'] == 'none' \
or style['visibility'] == 'hidden':
return
tag = barename(elem.tag)
istate = copy.copy(istates[-1]) istate = copy.copy(istates[-1])
istate.list_num = 0 istate.list_num = 0
istates.append(istate) istates.append(istate)
tag = barename(elem.tag)
style = stylizer.style(elem)
left = 0 left = 0
isblock = style['display'] not in ('inline', 'inline-block') display = style['display']
isblock = not display.startswith('inline')
isblock = isblock and tag != 'br' isblock = isblock and tag != 'br'
if isblock: if isblock:
bstate.para = None bstate.para = None
margin = style['margin-left'] istate.halign = style['text-align']
if not isinstance(margin, (int, float)): istate.indent = style['text-indent']
margin = 0 if style['margin-left'] == 'auto' \
padding = style['padding-left'] and style['margin-right'] == 'auto':
if not isinstance(padding, (int, float)): istate.halign = 'center'
padding = 0 margin = asfloat(style['margin-left'])
padding = asfloat(style['padding-left'])
if tag != 'body':
left = margin + padding left = margin + padding
bstate.left += left istate.left += left
bstate.vmargin = max((bstate.vmargin, style['margin-top'])) vmargin = asfloat(style['margin-top'])
padding = style['padding-top'] bstate.vmargin = max((bstate.vmargin, vmargin))
if isinstance(padding, (int, float)) and padding > 0: vpadding = asfloat(style['padding-top'])
if vpadding > 0:
bstate.vpadding += bstate.vmargin bstate.vpadding += bstate.vmargin
bstate.vpadding = padding bstate.vmargin = 0
bstate.vpadding += vpadding
else:
margin = asfloat(style['margin-left'])
padding = asfloat(style['padding-left'])
lspace = margin + padding
if lspace > 0:
spaces = int(round((lspace * 3) / style['font-size']))
elem.text = (u'\xa0' * spaces) + (elem.text or '')
margin = asfloat(style['margin-right'])
padding = asfloat(style['padding-right'])
rspace = margin + padding
if rspace > 0:
spaces = int(round((rspace * 3) / style['font-size']))
if len(elem) == 0:
elem.text = (elem.text or '') + (u'\xa0' * spaces)
else:
last = elem[-1]
last.text = (last.text or '') + (u'\xa0' * spaces)
if style['page-break-before'] in PAGE_BREAKS: if style['page-break-before'] in PAGE_BREAKS:
bstate.pbreak = True bstate.pbreak = True
istate.fsize = self.mobimlize_font(style['font-size']) istate.fsize = self.mobimlize_font(style['font-size'])
istate.italic = True if style['font-style'] == 'italic' else False istate.italic = True if style['font-style'] == 'italic' else False
weight = style['font-weight'] weight = style['font-weight']
if isinstance(weight, (int, float)): istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400
istate.bold = True if weight > 400 else False
else:
istate.bold = True if weight in ('bold', 'bolder') else False
istate.indent = style['text-indent']
istate.halign = style['text-align']
istate.preserve = (style['white-space'] in ('pre', 'pre-wrap')) istate.preserve = (style['white-space'] in ('pre', 'pre-wrap'))
if 'monospace' in style['font-family']:
istate.family = 'monospace'
valign = style['vertical-align'] valign = style['vertical-align']
if valign in ('super', 'sup') \ if valign in ('super', 'sup') and asfloat(valign) > 0:
or (isinstance(valign, (int, float)) and valign > 0):
istate.valign = 'super' istate.valign = 'super'
elif valign == 'sub' \ elif valign == 'sub' and asfloat(valign) < 0:
or (isinstance(valign, (int, float)) and valign < 0):
istate.valign = 'sub' istate.valign = 'sub'
else: else:
istate.valign = 'baseline' istate.valign = 'baseline'
@ -251,10 +295,15 @@ class MobiMLizer(object):
if tag == 'img' and 'src' in elem.attrib: if tag == 'img' and 'src' in elem.attrib:
istate.attrib['src'] = elem.attrib['src'] istate.attrib['src'] = elem.attrib['src']
istate.attrib['align'] = 'baseline' istate.attrib['align'] = 'baseline'
elif tag == 'hr' and 'width' in style.cssdict(): elif tag == 'hr' and asfloat(style['width']) > 0:
istate.attrib['width'] = mobimlize_measure(style['width']) prop = style['width'] / self.profile.width
elif tag in TABLE_TAGS: istate.attrib['width'] = "%d%%" % int(round(prop * 100))
istate.attrib.update(dict(elem.attrib)) elif display == 'table':
tag = 'table'
elif display == 'table-row':
tag = 'tr'
elif display == 'table-cell':
tag = 'td'
text = None text = None
if elem.text: if elem.text:
if istate.preserve: if istate.preserve:
@ -284,12 +333,14 @@ class MobiMLizer(object):
if para is not None and para.text == u'\xa0': if para is not None and para.text == u'\xa0':
para.getparent().replace(para, etree.Element('br')) para.getparent().replace(para, etree.Element('br'))
bstate.para = None bstate.para = None
bstate.left -= left bstate.istate = None
bstate.vmargin = max((bstate.vmargin, style['margin-bottom'])) vmargin = asfloat(style['margin-bottom'])
padding = style['padding-bottom'] bstate.vmargin = max((bstate.vmargin, vmargin))
if isinstance(padding, (int, float)) and padding > 0: vpadding = asfloat(style['padding-bottom'])
if vpadding > 0:
bstate.vpadding += bstate.vmargin bstate.vpadding += bstate.vmargin
bstate.vpadding = padding bstate.vmargin = 0
if bstate.nested: bstate.vpadding += vpadding
if tag in NESTABLE_TAGS and bstate.nested:
bstate.nested.pop() bstate.nested.pop()
istates.pop() istates.pop()

View File

@ -310,6 +310,7 @@ class MobiWriter(object):
data = data.getvalue() data = data.getvalue()
if len(data) < maxsizeb: if len(data) < maxsizeb:
return data return data
image = image.convert('RGBA')
for quality in xrange(95, -1, -1): for quality in xrange(95, -1, -1):
data = StringIO() data = StringIO()
image.save(data, 'JPEG', quality=quality) image.save(data, 'JPEG', quality=quality)
@ -425,7 +426,7 @@ class MobiWriter(object):
def main(argv=sys.argv): def main(argv=sys.argv):
from calibre.ebooks.oeb.base import DirWriter from calibre.ebooks.oeb.base import DirWriter
inpath, outpath = argv[1:] inpath, outpath = argv[1:]
context = Context('MSReader', 'MobiDesktop') context = Context('Firefox', 'MobiDesktop')
oeb = OEBBook(inpath) oeb = OEBBook(inpath)
#writer = MobiWriter(compression=PALMDOC) #writer = MobiWriter(compression=PALMDOC)
writer = MobiWriter(compression=UNCOMPRESSED) writer = MobiWriter(compression=UNCOMPRESSED)

View File

@ -41,7 +41,7 @@ PROFILES = {
# Not really, but let's pretend # Not really, but let's pretend
'MobiDesktop': 'MobiDesktop':
Profile(width=340, height=400, dpi=100, fbase=12, Profile(width=280, height=300, dpi=100, fbase=12,
fsizes=[9, 10, 11, 12, 14, 17, 20, 24]), fsizes=[9, 10, 11, 12, 14, 17, 20, 24]),
# No clue on usable screen size and DPI # No clue on usable screen size and DPI

View File

@ -92,7 +92,10 @@ def xpath(elem, expr):
return elem.xpath(expr, namespaces=XPNSMAP) return elem.xpath(expr, namespaces=XPNSMAP)
class CSSSelector(etree.XPath): class CSSSelector(etree.XPath):
MIN_SPACE_RE = re.compile(r' *([>~+]) *')
def __init__(self, css, namespaces=XPNSMAP): def __init__(self, css, namespaces=XPNSMAP):
css = self.MIN_SPACE_RE.sub(r'\1', css)
path = css_to_xpath(css) path = css_to_xpath(css)
etree.XPath.__init__(self, path, namespaces=namespaces) etree.XPath.__init__(self, path, namespaces=namespaces)
self.css = css self.css = css
@ -158,8 +161,8 @@ class Stylizer(object):
continue continue
for elem in selector(tree): for elem in selector(tree):
self.style(elem)._update_cssdict(cssdict) self.style(elem)._update_cssdict(cssdict)
for elem in tree.xpath('//*[@style]'): for elem in xpath(tree, '//h:*[@style]'):
self.style(elem)._apply_style_tag() self.style(elem)._apply_style_attr()
def flatten_rule(self, rule, href, index): def flatten_rule(self, rule, href, index):
@ -262,12 +265,14 @@ class Style(object):
self._profile = stylizer.profile self._profile = stylizer.profile
self._stylizer = stylizer self._stylizer = stylizer
self._style = {} self._style = {}
self._fontSize = None
self._width = None
stylizer._styles[element] = self stylizer._styles[element] = self
def _update_cssdict(self, cssdict): def _update_cssdict(self, cssdict):
self._style.update(cssdict) self._style.update(cssdict)
def _apply_style_tag(self): def _apply_style_attr(self):
attrib = self._element.attrib attrib = self._element.attrib
if 'style' in attrib: if 'style' in attrib:
style = CSSStyleDeclaration(attrib['style']) style = CSSStyleDeclaration(attrib['style'])
@ -333,12 +338,11 @@ class Style(object):
@property @property
def fontSize(self): def fontSize(self):
def normalize_fontsize(value, base=None): def normalize_fontsize(value, base):
result = None result = None
factor = None factor = None
if value == 'inherit': if value == 'inherit':
# We should only see this if the root element value = base
value = self._profile.fbase
if value in FONT_SIZE_NAMES: if value in FONT_SIZE_NAMES:
result = self._profile.fnames[value] result = self._profile.fnames[value]
elif value == 'smaller': elif value == 'smaller':
@ -360,6 +364,7 @@ class Style(object):
if factor: if factor:
result = factor * base result = factor * base
return result return result
if self._fontSize is None:
result = None result = None
if self._has_parent(): if self._has_parent():
styles = self._stylizer._styles styles = self._stylizer._styles
@ -371,11 +376,12 @@ class Style(object):
result = normalize_fontsize(size, base) result = normalize_fontsize(size, base)
else: else:
result = base result = base
self.__dict__['fontSize'] = result self._fontSize = result
return result return self._fontSize
@property @property
def width(self): def width(self):
if self._width is None:
result = None result = None
base = None base = None
if self._has_parent(): if self._has_parent():
@ -391,8 +397,8 @@ class Style(object):
result = self._unit_convert(width, base=base) result = self._unit_convert(width, base=base)
else: else:
result = base result = base
self.__dict__['width'] = result self._width = result
return result return self._width
def __str__(self): def __str__(self):
items = self._style.items() items = self._style.items()

View File

@ -20,8 +20,6 @@ from calibre.ebooks.oeb.base import namespace, barename
from calibre.ebooks.oeb.base import OEBBook from calibre.ebooks.oeb.base import OEBBook
from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.stylizer import Stylizer
BASEFONT_CSS = 'body { font-size: %0.5fpt; }'
COLLAPSE = re.compile(r'[ \t\r\n\v]+') COLLAPSE = re.compile(r'[ \t\r\n\v]+')
STRIPNUM = re.compile(r'[-0-9]+$') STRIPNUM = re.compile(r'[-0-9]+$')
@ -90,19 +88,11 @@ class CSSFlattener(object):
def transform(self, oeb, context): def transform(self, oeb, context):
self.oeb = oeb self.oeb = oeb
self.context = context self.context = context
self.premangle_css()
self.stylize_spine() self.stylize_spine()
self.sbase = self.baseline_spine() if self.fbase else None self.sbase = self.baseline_spine() if self.fbase else None
self.fmap = FontMapper(self.sbase, self.fbase, self.fkey) self.fmap = FontMapper(self.sbase, self.fbase, self.fkey)
self.flatten_spine() self.flatten_spine()
def premangle_css(self):
fbase = self.context.source.fbase
for item in self.oeb.manifest.values():
if item.media_type in OEB_STYLES:
basefont_css = BASEFONT_CSS % (fbase,)
item.data = basefont_css + item.data
def stylize_spine(self): def stylize_spine(self):
self.stylizers = {} self.stylizers = {}
profile = self.context.source profile = self.context.source
@ -112,13 +102,13 @@ class CSSFlattener(object):
self.stylizers[item] = stylizer self.stylizers[item] = stylizer
def baseline_node(self, node, stylizer, sizes, csize): def baseline_node(self, node, stylizer, sizes, csize):
if node.tail:
sizes[csize] += len(COLLAPSE.sub(' ', node.tail))
csize = stylizer.style(node)['font-size'] csize = stylizer.style(node)['font-size']
if node.text: if node.text:
sizes[csize] += len(COLLAPSE.sub(' ', node.text)) sizes[csize] += len(COLLAPSE.sub(' ', node.text))
for child in node: for child in node:
self.baseline_node(child, stylizer, sizes, csize) self.baseline_node(child, stylizer, sizes, csize)
if child.tail:
sizes[csize] += len(COLLAPSE.sub(' ', child.tail))
def baseline_spine(self): def baseline_spine(self):
sizes = defaultdict(float) sizes = defaultdict(float)
@ -155,6 +145,27 @@ class CSSFlattener(object):
tag = barename(node.tag) tag = barename(node.tag)
style = stylizer.style(node) style = stylizer.style(node)
cssdict = style.cssdict() cssdict = style.cssdict()
if 'align' in node.attrib:
cssdict['text-align'] = node.attrib['align']
del node.attrib['align']
if node.tag == XHTML('font'):
node.tag = XHTML('span')
if 'size' in node.attrib:
size = node.attrib['size']
if size.startswith('+'):
cssdict['font-size'] = 'larger'
elif size.startswith('-'):
cssdict['font-size'] = 'smaller'
else:
fnums = self.context.source.fnums
cssdict['font-size'] = fnums[int(size)]
del node.attrib['size']
if 'color' in node.attrib:
cssdict['color'] = node.attrib['color']
del node.attrib['color']
if 'bgcolor' in node.attrib:
cssdict['background-color'] = node.attrib['bgcolor']
del node.attrib['bgcolor']
if cssdict: if cssdict:
if 'font-size' in cssdict: if 'font-size' in cssdict:
fsize = self.fmap[style['font-size']] fsize = self.fmap[style['font-size']]
@ -170,6 +181,8 @@ class CSSFlattener(object):
left -= style['text-indent'] left -= style['text-indent']
if self.unfloat and 'float' in cssdict and tag != 'img': if self.unfloat and 'float' in cssdict and tag != 'img':
del cssdict['float'] del cssdict['float']
if cssdict.get('display', 'none') != 'none':
del cssdict['display']
if 'vertical-align' in cssdict: if 'vertical-align' in cssdict:
if cssdict['vertical-align'] == 'sup': if cssdict['vertical-align'] == 'sup':
cssdict['vertical-align'] = 'super' cssdict['vertical-align'] = 'super'