Various tweaks and improvements to Mobi generation.

This commit is contained in:
Marshall T. Vandegrift 2009-01-04 23:30:47 -05:00
parent 8348264198
commit d3f12fcf36
5 changed files with 168 additions and 97 deletions

View File

@ -29,6 +29,11 @@ PAGE_BREAKS = set(['always', 'odd', 'even'])
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
def asfloat(value):
if not isinstance(value, (int, long, float)):
return 0.0
return float(value)
class BlockState(object):
def __init__(self, body):
self.body = body
@ -37,12 +42,12 @@ class BlockState(object):
self.inline = None
self.vpadding = 0.
self.vmargin = 0.
self.left = 0.
self.pbreak = False
self.istate = None
class FormatState(object):
def __init__(self):
self.left = 0.
self.halign = 'auto'
self.indent = 0.
self.fsize = 3
@ -51,6 +56,7 @@ class FormatState(object):
self.italic = False
self.bold = False
self.preserve = True
self.family = 'serif'
self.href = None
self.list_num = 0
self.attrib = {}
@ -60,7 +66,9 @@ class FormatState(object):
and self.italic == other.italic \
and self.bold == other.bold \
and self.href == other.href \
and self.valign == other.valign
and self.valign == other.valign \
and self.preserve == other.preserve \
and self.family == other.family
def __ne__(self, other):
return not self.__eq__(other)
@ -94,7 +102,6 @@ class MobiMLizer(object):
def mobimlize_measure(self, ptsize):
if isinstance(ptsize, basestring):
return ptsize
# All MobiML measures occur in the default font-space
fbase = self.profile.fbase
if ptsize < fbase:
return "%dpt" % int(round(ptsize * 2))
@ -116,7 +123,7 @@ class MobiMLizer(object):
istate = istates[-1]
if istate.ids:
body = bstate.body
index = max((0, len(body) - 2))
index = max((0, len(body) - 1))
for id in istate.ids:
body.insert(index, etree.Element('a', attrib={'id': id}))
istate.ids.clear()
@ -126,22 +133,30 @@ class MobiMLizer(object):
elif para is None:
bstate.istate = None
parent = bstate.nested[-1] if bstate.nested else bstate.body
indent = istate.indent
left = istate.left
if indent < 0 and abs(indent) < left:
left += indent
indent = 0
elif indent != 0 and abs(indent) < self.profile.fbase:
indent = (indent / abs(indent)) * self.profile.fbase
if bstate.pbreak:
etree.SubElement(parent, MBP('pagebreak'))
bstate.pbreak = False
if tag in NESTABLE_TAGS:
para = wrapper = etree.SubElement(parent, tag)
bstate.nested.append(para)
# Should instead support full CSS lists?
if tag == 'li' and len(istates) > 1:
istates[-2].list_num += 1
para.attrib['value'] = str(istates[-2].list_num)
elif bstate.left > 0 and istate.indent >= 0:
elif left > 0 and indent >= 0:
para = wrapper = etree.SubElement(parent, 'blockquote')
left = int(round(bstate.left / self.profile.fbase)) - 1
while left > 0:
para = wrapper
emleft = int(round(left / self.profile.fbase)) - 1
emleft = min((emleft, 10))
while emleft > 0:
para = etree.SubElement(para, 'blockquote')
left -= 1
emleft -= 1
else:
ptag = tag if tag in HEADER_TAGS else 'p'
para = wrapper = etree.SubElement(parent, ptag)
@ -150,7 +165,14 @@ class MobiMLizer(object):
bstate.vpadding = bstate.vmargin = 0
if tag not in TABLE_TAGS:
wrapper.attrib['height'] = self.mobimlize_measure(vspace)
para.attrib['width'] = self.mobimlize_measure(istate.indent)
para.attrib['width'] = self.mobimlize_measure(indent)
elif tag == 'table' and vspace > 0:
body = bstate.body
vspace = int(round(vspace / self.profile.fbase))
index = max((0, len(body) - 1))
while vspace > 0:
body.insert(index, etree.Element('br'))
vspace -= 1
if istate.halign != 'auto':
para.attrib['align'] = istate.halign
pstate = bstate.istate
@ -158,6 +180,8 @@ class MobiMLizer(object):
bstate.inline = para
pstate = bstate.istate = None
etree.SubElement(para, tag, attrib=istate.attrib)
elif tag in TABLE_TAGS:
para.attrib['valign'] = 'top'
if not text:
return
if not pstate or istate != pstate:
@ -169,7 +193,7 @@ class MobiMLizer(object):
inline = etree.SubElement(inline, 'sup')
elif valign == 'sub':
inline = etree.SubElement(inline, 'sub')
if istate.preserve:
if istate.family == 'monospace':
inline = etree.SubElement(inline, 'tt')
if fsize != 3:
inline = etree.SubElement(inline, 'font', size=str(fsize))
@ -182,8 +206,8 @@ class MobiMLizer(object):
bstate.inline = inline
bstate.istate = istate
inline = bstate.inline
items = self.preize_text(text) if istate.preserve else [text]
for item in items:
content = self.preize_text(text) if istate.preserve else [text]
for item in content:
if isinstance(item, basestring):
if len(inline) == 0:
inline.text = (inline.text or '') + item
@ -197,47 +221,67 @@ class MobiMLizer(object):
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
return
style = stylizer.style(elem)
if style['display'] == 'none' \
or style['visibility'] == 'hidden':
return
tag = barename(elem.tag)
istate = copy.copy(istates[-1])
istate.list_num = 0
istates.append(istate)
tag = barename(elem.tag)
style = stylizer.style(elem)
left = 0
isblock = style['display'] not in ('inline', 'inline-block')
display = style['display']
isblock = not display.startswith('inline')
isblock = isblock and tag != 'br'
if isblock:
bstate.para = None
margin = style['margin-left']
if not isinstance(margin, (int, float)):
margin = 0
padding = style['padding-left']
if not isinstance(padding, (int, float)):
padding = 0
left = margin + padding
bstate.left += left
bstate.vmargin = max((bstate.vmargin, style['margin-top']))
padding = style['padding-top']
if isinstance(padding, (int, float)) and padding > 0:
istate.halign = style['text-align']
istate.indent = style['text-indent']
if style['margin-left'] == 'auto' \
and style['margin-right'] == 'auto':
istate.halign = 'center'
margin = asfloat(style['margin-left'])
padding = asfloat(style['padding-left'])
if tag != 'body':
left = margin + padding
istate.left += left
vmargin = asfloat(style['margin-top'])
bstate.vmargin = max((bstate.vmargin, vmargin))
vpadding = asfloat(style['padding-top'])
if vpadding > 0:
bstate.vpadding += bstate.vmargin
bstate.vpadding = padding
bstate.vmargin = 0
bstate.vpadding += vpadding
else:
margin = asfloat(style['margin-left'])
padding = asfloat(style['padding-left'])
lspace = margin + padding
if lspace > 0:
spaces = int(round((lspace * 3) / style['font-size']))
elem.text = (u'\xa0' * spaces) + (elem.text or '')
margin = asfloat(style['margin-right'])
padding = asfloat(style['padding-right'])
rspace = margin + padding
if rspace > 0:
spaces = int(round((rspace * 3) / style['font-size']))
if len(elem) == 0:
elem.text = (elem.text or '') + (u'\xa0' * spaces)
else:
last = elem[-1]
last.text = (last.text or '') + (u'\xa0' * spaces)
if style['page-break-before'] in PAGE_BREAKS:
bstate.pbreak = True
istate.fsize = self.mobimlize_font(style['font-size'])
istate.italic = True if style['font-style'] == 'italic' else False
weight = style['font-weight']
if isinstance(weight, (int, float)):
istate.bold = True if weight > 400 else False
else:
istate.bold = True if weight in ('bold', 'bolder') else False
istate.indent = style['text-indent']
istate.halign = style['text-align']
istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400
istate.preserve = (style['white-space'] in ('pre', 'pre-wrap'))
if 'monospace' in style['font-family']:
istate.family = 'monospace'
valign = style['vertical-align']
if valign in ('super', 'sup') \
or (isinstance(valign, (int, float)) and valign > 0):
if valign in ('super', 'sup') and asfloat(valign) > 0:
istate.valign = 'super'
elif valign == 'sub' \
or (isinstance(valign, (int, float)) and valign < 0):
elif valign == 'sub' and asfloat(valign) < 0:
istate.valign = 'sub'
else:
istate.valign = 'baseline'
@ -251,10 +295,15 @@ class MobiMLizer(object):
if tag == 'img' and 'src' in elem.attrib:
istate.attrib['src'] = elem.attrib['src']
istate.attrib['align'] = 'baseline'
elif tag == 'hr' and 'width' in style.cssdict():
istate.attrib['width'] = mobimlize_measure(style['width'])
elif tag in TABLE_TAGS:
istate.attrib.update(dict(elem.attrib))
elif tag == 'hr' and asfloat(style['width']) > 0:
prop = style['width'] / self.profile.width
istate.attrib['width'] = "%d%%" % int(round(prop * 100))
elif display == 'table':
tag = 'table'
elif display == 'table-row':
tag = 'tr'
elif display == 'table-cell':
tag = 'td'
text = None
if elem.text:
if istate.preserve:
@ -284,12 +333,14 @@ class MobiMLizer(object):
if para is not None and para.text == u'\xa0':
para.getparent().replace(para, etree.Element('br'))
bstate.para = None
bstate.left -= left
bstate.vmargin = max((bstate.vmargin, style['margin-bottom']))
padding = style['padding-bottom']
if isinstance(padding, (int, float)) and padding > 0:
bstate.istate = None
vmargin = asfloat(style['margin-bottom'])
bstate.vmargin = max((bstate.vmargin, vmargin))
vpadding = asfloat(style['padding-bottom'])
if vpadding > 0:
bstate.vpadding += bstate.vmargin
bstate.vpadding = padding
if bstate.nested:
bstate.vmargin = 0
bstate.vpadding += vpadding
if tag in NESTABLE_TAGS and bstate.nested:
bstate.nested.pop()
istates.pop()

View File

@ -310,6 +310,7 @@ class MobiWriter(object):
data = data.getvalue()
if len(data) < maxsizeb:
return data
image = image.convert('RGBA')
for quality in xrange(95, -1, -1):
data = StringIO()
image.save(data, 'JPEG', quality=quality)
@ -425,7 +426,7 @@ class MobiWriter(object):
def main(argv=sys.argv):
from calibre.ebooks.oeb.base import DirWriter
inpath, outpath = argv[1:]
context = Context('MSReader', 'MobiDesktop')
context = Context('Firefox', 'MobiDesktop')
oeb = OEBBook(inpath)
#writer = MobiWriter(compression=PALMDOC)
writer = MobiWriter(compression=UNCOMPRESSED)

View File

@ -41,7 +41,7 @@ PROFILES = {
# Not really, but let's pretend
'MobiDesktop':
Profile(width=340, height=400, dpi=100, fbase=12,
Profile(width=280, height=300, dpi=100, fbase=12,
fsizes=[9, 10, 11, 12, 14, 17, 20, 24]),
# No clue on usable screen size and DPI

View File

@ -92,7 +92,10 @@ def xpath(elem, expr):
return elem.xpath(expr, namespaces=XPNSMAP)
class CSSSelector(etree.XPath):
MIN_SPACE_RE = re.compile(r' *([>~+]) *')
def __init__(self, css, namespaces=XPNSMAP):
css = self.MIN_SPACE_RE.sub(r'\1', css)
path = css_to_xpath(css)
etree.XPath.__init__(self, path, namespaces=namespaces)
self.css = css
@ -158,8 +161,8 @@ class Stylizer(object):
continue
for elem in selector(tree):
self.style(elem)._update_cssdict(cssdict)
for elem in tree.xpath('//*[@style]'):
self.style(elem)._apply_style_tag()
for elem in xpath(tree, '//h:*[@style]'):
self.style(elem)._apply_style_attr()
def flatten_rule(self, rule, href, index):
@ -262,12 +265,14 @@ class Style(object):
self._profile = stylizer.profile
self._stylizer = stylizer
self._style = {}
self._fontSize = None
self._width = None
stylizer._styles[element] = self
def _update_cssdict(self, cssdict):
self._style.update(cssdict)
def _apply_style_tag(self):
def _apply_style_attr(self):
attrib = self._element.attrib
if 'style' in attrib:
style = CSSStyleDeclaration(attrib['style'])
@ -333,12 +338,11 @@ class Style(object):
@property
def fontSize(self):
def normalize_fontsize(value, base=None):
def normalize_fontsize(value, base):
result = None
factor = None
if value == 'inherit':
# We should only see this if the root element
value = self._profile.fbase
value = base
if value in FONT_SIZE_NAMES:
result = self._profile.fnames[value]
elif value == 'smaller':
@ -360,39 +364,41 @@ class Style(object):
if factor:
result = factor * base
return result
result = None
if self._has_parent():
styles = self._stylizer._styles
base = styles[self._element.getparent()].fontSize
else:
base = self._profile.fbase
if 'font-size' in self._style:
size = self._style['font-size']
result = normalize_fontsize(size, base)
else:
result = base
self.__dict__['fontSize'] = result
return result
if self._fontSize is None:
result = None
if self._has_parent():
styles = self._stylizer._styles
base = styles[self._element.getparent()].fontSize
else:
base = self._profile.fbase
if 'font-size' in self._style:
size = self._style['font-size']
result = normalize_fontsize(size, base)
else:
result = base
self._fontSize = result
return self._fontSize
@property
def width(self):
result = None
base = None
if self._has_parent():
styles = self._stylizer._styles
base = styles[self._element.getparent()].width
else:
base = self._profile.width
if 'width' in self._style:
width = self._style['width']
if width == 'auto':
result = base
if self._width is None:
result = None
base = None
if self._has_parent():
styles = self._stylizer._styles
base = styles[self._element.getparent()].width
else:
result = self._unit_convert(width, base=base)
else:
result = base
self.__dict__['width'] = result
return result
base = self._profile.width
if 'width' in self._style:
width = self._style['width']
if width == 'auto':
result = base
else:
result = self._unit_convert(width, base=base)
else:
result = base
self._width = result
return self._width
def __str__(self):
items = self._style.items()

View File

@ -20,8 +20,6 @@ from calibre.ebooks.oeb.base import namespace, barename
from calibre.ebooks.oeb.base import OEBBook
from calibre.ebooks.oeb.stylizer import Stylizer
BASEFONT_CSS = 'body { font-size: %0.5fpt; }'
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
STRIPNUM = re.compile(r'[-0-9]+$')
@ -90,19 +88,11 @@ class CSSFlattener(object):
def transform(self, oeb, context):
self.oeb = oeb
self.context = context
self.premangle_css()
self.stylize_spine()
self.sbase = self.baseline_spine() if self.fbase else None
self.fmap = FontMapper(self.sbase, self.fbase, self.fkey)
self.flatten_spine()
def premangle_css(self):
fbase = self.context.source.fbase
for item in self.oeb.manifest.values():
if item.media_type in OEB_STYLES:
basefont_css = BASEFONT_CSS % (fbase,)
item.data = basefont_css + item.data
def stylize_spine(self):
self.stylizers = {}
profile = self.context.source
@ -112,13 +102,13 @@ class CSSFlattener(object):
self.stylizers[item] = stylizer
def baseline_node(self, node, stylizer, sizes, csize):
if node.tail:
sizes[csize] += len(COLLAPSE.sub(' ', node.tail))
csize = stylizer.style(node)['font-size']
if node.text:
sizes[csize] += len(COLLAPSE.sub(' ', node.text))
for child in node:
self.baseline_node(child, stylizer, sizes, csize)
if child.tail:
sizes[csize] += len(COLLAPSE.sub(' ', child.tail))
def baseline_spine(self):
sizes = defaultdict(float)
@ -155,6 +145,27 @@ class CSSFlattener(object):
tag = barename(node.tag)
style = stylizer.style(node)
cssdict = style.cssdict()
if 'align' in node.attrib:
cssdict['text-align'] = node.attrib['align']
del node.attrib['align']
if node.tag == XHTML('font'):
node.tag = XHTML('span')
if 'size' in node.attrib:
size = node.attrib['size']
if size.startswith('+'):
cssdict['font-size'] = 'larger'
elif size.startswith('-'):
cssdict['font-size'] = 'smaller'
else:
fnums = self.context.source.fnums
cssdict['font-size'] = fnums[int(size)]
del node.attrib['size']
if 'color' in node.attrib:
cssdict['color'] = node.attrib['color']
del node.attrib['color']
if 'bgcolor' in node.attrib:
cssdict['background-color'] = node.attrib['bgcolor']
del node.attrib['bgcolor']
if cssdict:
if 'font-size' in cssdict:
fsize = self.fmap[style['font-size']]
@ -170,6 +181,8 @@ class CSSFlattener(object):
left -= style['text-indent']
if self.unfloat and 'float' in cssdict and tag != 'img':
del cssdict['float']
if cssdict.get('display', 'none') != 'none':
del cssdict['display']
if 'vertical-align' in cssdict:
if cssdict['vertical-align'] == 'sup':
cssdict['vertical-align'] = 'super'