From d3f12fcf36d642a44e810e8ad91f131f4e08dcb9 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Sun, 4 Jan 2009 23:30:47 -0500 Subject: [PATCH] Various tweaks and improvements to Mobi generation. --- src/calibre/ebooks/mobi/mobiml.py | 147 +++++++++++++------ src/calibre/ebooks/mobi/writer.py | 3 +- src/calibre/ebooks/oeb/profile.py | 2 +- src/calibre/ebooks/oeb/stylizer.py | 76 +++++----- src/calibre/ebooks/oeb/transforms/flatcss.py | 37 +++-- 5 files changed, 168 insertions(+), 97 deletions(-) diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py index ef2b307f8d..1ad70c0865 100644 --- a/src/calibre/ebooks/mobi/mobiml.py +++ b/src/calibre/ebooks/mobi/mobiml.py @@ -29,6 +29,11 @@ PAGE_BREAKS = set(['always', 'odd', 'even']) COLLAPSE = re.compile(r'[ \t\r\n\v]+') +def asfloat(value): + if not isinstance(value, (int, long, float)): + return 0.0 + return float(value) + class BlockState(object): def __init__(self, body): self.body = body @@ -37,12 +42,12 @@ class BlockState(object): self.inline = None self.vpadding = 0. self.vmargin = 0. - self.left = 0. self.pbreak = False self.istate = None class FormatState(object): def __init__(self): + self.left = 0. self.halign = 'auto' self.indent = 0. self.fsize = 3 @@ -51,6 +56,7 @@ class FormatState(object): self.italic = False self.bold = False self.preserve = True + self.family = 'serif' self.href = None self.list_num = 0 self.attrib = {} @@ -60,7 +66,9 @@ class FormatState(object): and self.italic == other.italic \ and self.bold == other.bold \ and self.href == other.href \ - and self.valign == other.valign + and self.valign == other.valign \ + and self.preserve == other.preserve \ + and self.family == other.family def __ne__(self, other): return not self.__eq__(other) @@ -94,7 +102,6 @@ class MobiMLizer(object): def mobimlize_measure(self, ptsize): if isinstance(ptsize, basestring): return ptsize - # All MobiML measures occur in the default font-space fbase = self.profile.fbase if ptsize < fbase: return "%dpt" % int(round(ptsize * 2)) @@ -116,7 +123,7 @@ class MobiMLizer(object): istate = istates[-1] if istate.ids: body = bstate.body - index = max((0, len(body) - 2)) + index = max((0, len(body) - 1)) for id in istate.ids: body.insert(index, etree.Element('a', attrib={'id': id})) istate.ids.clear() @@ -126,22 +133,30 @@ class MobiMLizer(object): elif para is None: bstate.istate = None parent = bstate.nested[-1] if bstate.nested else bstate.body + indent = istate.indent + left = istate.left + if indent < 0 and abs(indent) < left: + left += indent + indent = 0 + elif indent != 0 and abs(indent) < self.profile.fbase: + indent = (indent / abs(indent)) * self.profile.fbase if bstate.pbreak: etree.SubElement(parent, MBP('pagebreak')) bstate.pbreak = False if tag in NESTABLE_TAGS: para = wrapper = etree.SubElement(parent, tag) bstate.nested.append(para) - # Should instead support full CSS lists? if tag == 'li' and len(istates) > 1: istates[-2].list_num += 1 para.attrib['value'] = str(istates[-2].list_num) - elif bstate.left > 0 and istate.indent >= 0: + elif left > 0 and indent >= 0: para = wrapper = etree.SubElement(parent, 'blockquote') - left = int(round(bstate.left / self.profile.fbase)) - 1 - while left > 0: + para = wrapper + emleft = int(round(left / self.profile.fbase)) - 1 + emleft = min((emleft, 10)) + while emleft > 0: para = etree.SubElement(para, 'blockquote') - left -= 1 + emleft -= 1 else: ptag = tag if tag in HEADER_TAGS else 'p' para = wrapper = etree.SubElement(parent, ptag) @@ -150,7 +165,14 @@ class MobiMLizer(object): bstate.vpadding = bstate.vmargin = 0 if tag not in TABLE_TAGS: wrapper.attrib['height'] = self.mobimlize_measure(vspace) - para.attrib['width'] = self.mobimlize_measure(istate.indent) + para.attrib['width'] = self.mobimlize_measure(indent) + elif tag == 'table' and vspace > 0: + body = bstate.body + vspace = int(round(vspace / self.profile.fbase)) + index = max((0, len(body) - 1)) + while vspace > 0: + body.insert(index, etree.Element('br')) + vspace -= 1 if istate.halign != 'auto': para.attrib['align'] = istate.halign pstate = bstate.istate @@ -158,6 +180,8 @@ class MobiMLizer(object): bstate.inline = para pstate = bstate.istate = None etree.SubElement(para, tag, attrib=istate.attrib) + elif tag in TABLE_TAGS: + para.attrib['valign'] = 'top' if not text: return if not pstate or istate != pstate: @@ -169,7 +193,7 @@ class MobiMLizer(object): inline = etree.SubElement(inline, 'sup') elif valign == 'sub': inline = etree.SubElement(inline, 'sub') - if istate.preserve: + if istate.family == 'monospace': inline = etree.SubElement(inline, 'tt') if fsize != 3: inline = etree.SubElement(inline, 'font', size=str(fsize)) @@ -182,8 +206,8 @@ class MobiMLizer(object): bstate.inline = inline bstate.istate = istate inline = bstate.inline - items = self.preize_text(text) if istate.preserve else [text] - for item in items: + content = self.preize_text(text) if istate.preserve else [text] + for item in content: if isinstance(item, basestring): if len(inline) == 0: inline.text = (inline.text or '') + item @@ -197,47 +221,67 @@ class MobiMLizer(object): if not isinstance(elem.tag, basestring) \ or namespace(elem.tag) != XHTML_NS: return + style = stylizer.style(elem) + if style['display'] == 'none' \ + or style['visibility'] == 'hidden': + return + tag = barename(elem.tag) istate = copy.copy(istates[-1]) istate.list_num = 0 istates.append(istate) - tag = barename(elem.tag) - style = stylizer.style(elem) left = 0 - isblock = style['display'] not in ('inline', 'inline-block') + display = style['display'] + isblock = not display.startswith('inline') isblock = isblock and tag != 'br' if isblock: bstate.para = None - margin = style['margin-left'] - if not isinstance(margin, (int, float)): - margin = 0 - padding = style['padding-left'] - if not isinstance(padding, (int, float)): - padding = 0 - left = margin + padding - bstate.left += left - bstate.vmargin = max((bstate.vmargin, style['margin-top'])) - padding = style['padding-top'] - if isinstance(padding, (int, float)) and padding > 0: + istate.halign = style['text-align'] + istate.indent = style['text-indent'] + if style['margin-left'] == 'auto' \ + and style['margin-right'] == 'auto': + istate.halign = 'center' + margin = asfloat(style['margin-left']) + padding = asfloat(style['padding-left']) + if tag != 'body': + left = margin + padding + istate.left += left + vmargin = asfloat(style['margin-top']) + bstate.vmargin = max((bstate.vmargin, vmargin)) + vpadding = asfloat(style['padding-top']) + if vpadding > 0: bstate.vpadding += bstate.vmargin - bstate.vpadding = padding + bstate.vmargin = 0 + bstate.vpadding += vpadding + else: + margin = asfloat(style['margin-left']) + padding = asfloat(style['padding-left']) + lspace = margin + padding + if lspace > 0: + spaces = int(round((lspace * 3) / style['font-size'])) + elem.text = (u'\xa0' * spaces) + (elem.text or '') + margin = asfloat(style['margin-right']) + padding = asfloat(style['padding-right']) + rspace = margin + padding + if rspace > 0: + spaces = int(round((rspace * 3) / style['font-size'])) + if len(elem) == 0: + elem.text = (elem.text or '') + (u'\xa0' * spaces) + else: + last = elem[-1] + last.text = (last.text or '') + (u'\xa0' * spaces) if style['page-break-before'] in PAGE_BREAKS: bstate.pbreak = True istate.fsize = self.mobimlize_font(style['font-size']) istate.italic = True if style['font-style'] == 'italic' else False weight = style['font-weight'] - if isinstance(weight, (int, float)): - istate.bold = True if weight > 400 else False - else: - istate.bold = True if weight in ('bold', 'bolder') else False - istate.indent = style['text-indent'] - istate.halign = style['text-align'] + istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400 istate.preserve = (style['white-space'] in ('pre', 'pre-wrap')) + if 'monospace' in style['font-family']: + istate.family = 'monospace' valign = style['vertical-align'] - if valign in ('super', 'sup') \ - or (isinstance(valign, (int, float)) and valign > 0): + if valign in ('super', 'sup') and asfloat(valign) > 0: istate.valign = 'super' - elif valign == 'sub' \ - or (isinstance(valign, (int, float)) and valign < 0): + elif valign == 'sub' and asfloat(valign) < 0: istate.valign = 'sub' else: istate.valign = 'baseline' @@ -251,10 +295,15 @@ class MobiMLizer(object): if tag == 'img' and 'src' in elem.attrib: istate.attrib['src'] = elem.attrib['src'] istate.attrib['align'] = 'baseline' - elif tag == 'hr' and 'width' in style.cssdict(): - istate.attrib['width'] = mobimlize_measure(style['width']) - elif tag in TABLE_TAGS: - istate.attrib.update(dict(elem.attrib)) + elif tag == 'hr' and asfloat(style['width']) > 0: + prop = style['width'] / self.profile.width + istate.attrib['width'] = "%d%%" % int(round(prop * 100)) + elif display == 'table': + tag = 'table' + elif display == 'table-row': + tag = 'tr' + elif display == 'table-cell': + tag = 'td' text = None if elem.text: if istate.preserve: @@ -284,12 +333,14 @@ class MobiMLizer(object): if para is not None and para.text == u'\xa0': para.getparent().replace(para, etree.Element('br')) bstate.para = None - bstate.left -= left - bstate.vmargin = max((bstate.vmargin, style['margin-bottom'])) - padding = style['padding-bottom'] - if isinstance(padding, (int, float)) and padding > 0: + bstate.istate = None + vmargin = asfloat(style['margin-bottom']) + bstate.vmargin = max((bstate.vmargin, vmargin)) + vpadding = asfloat(style['padding-bottom']) + if vpadding > 0: bstate.vpadding += bstate.vmargin - bstate.vpadding = padding - if bstate.nested: + bstate.vmargin = 0 + bstate.vpadding += vpadding + if tag in NESTABLE_TAGS and bstate.nested: bstate.nested.pop() istates.pop() diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 3a529deac8..7cfcd7a415 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -310,6 +310,7 @@ class MobiWriter(object): data = data.getvalue() if len(data) < maxsizeb: return data + image = image.convert('RGBA') for quality in xrange(95, -1, -1): data = StringIO() image.save(data, 'JPEG', quality=quality) @@ -425,7 +426,7 @@ class MobiWriter(object): def main(argv=sys.argv): from calibre.ebooks.oeb.base import DirWriter inpath, outpath = argv[1:] - context = Context('MSReader', 'MobiDesktop') + context = Context('Firefox', 'MobiDesktop') oeb = OEBBook(inpath) #writer = MobiWriter(compression=PALMDOC) writer = MobiWriter(compression=UNCOMPRESSED) diff --git a/src/calibre/ebooks/oeb/profile.py b/src/calibre/ebooks/oeb/profile.py index b76de13c14..901555fdd7 100644 --- a/src/calibre/ebooks/oeb/profile.py +++ b/src/calibre/ebooks/oeb/profile.py @@ -41,7 +41,7 @@ PROFILES = { # Not really, but let's pretend 'MobiDesktop': - Profile(width=340, height=400, dpi=100, fbase=12, + Profile(width=280, height=300, dpi=100, fbase=12, fsizes=[9, 10, 11, 12, 14, 17, 20, 24]), # No clue on usable screen size and DPI diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index b6ff56c8ed..45e248febe 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -92,7 +92,10 @@ def xpath(elem, expr): return elem.xpath(expr, namespaces=XPNSMAP) class CSSSelector(etree.XPath): + MIN_SPACE_RE = re.compile(r' *([>~+]) *') + def __init__(self, css, namespaces=XPNSMAP): + css = self.MIN_SPACE_RE.sub(r'\1', css) path = css_to_xpath(css) etree.XPath.__init__(self, path, namespaces=namespaces) self.css = css @@ -158,8 +161,8 @@ class Stylizer(object): continue for elem in selector(tree): self.style(elem)._update_cssdict(cssdict) - for elem in tree.xpath('//*[@style]'): - self.style(elem)._apply_style_tag() + for elem in xpath(tree, '//h:*[@style]'): + self.style(elem)._apply_style_attr() def flatten_rule(self, rule, href, index): @@ -262,12 +265,14 @@ class Style(object): self._profile = stylizer.profile self._stylizer = stylizer self._style = {} + self._fontSize = None + self._width = None stylizer._styles[element] = self def _update_cssdict(self, cssdict): self._style.update(cssdict) - def _apply_style_tag(self): + def _apply_style_attr(self): attrib = self._element.attrib if 'style' in attrib: style = CSSStyleDeclaration(attrib['style']) @@ -333,12 +338,11 @@ class Style(object): @property def fontSize(self): - def normalize_fontsize(value, base=None): + def normalize_fontsize(value, base): result = None factor = None if value == 'inherit': - # We should only see this if the root element - value = self._profile.fbase + value = base if value in FONT_SIZE_NAMES: result = self._profile.fnames[value] elif value == 'smaller': @@ -360,39 +364,41 @@ class Style(object): if factor: result = factor * base return result - result = None - if self._has_parent(): - styles = self._stylizer._styles - base = styles[self._element.getparent()].fontSize - else: - base = self._profile.fbase - if 'font-size' in self._style: - size = self._style['font-size'] - result = normalize_fontsize(size, base) - else: - result = base - self.__dict__['fontSize'] = result - return result + if self._fontSize is None: + result = None + if self._has_parent(): + styles = self._stylizer._styles + base = styles[self._element.getparent()].fontSize + else: + base = self._profile.fbase + if 'font-size' in self._style: + size = self._style['font-size'] + result = normalize_fontsize(size, base) + else: + result = base + self._fontSize = result + return self._fontSize @property def width(self): - result = None - base = None - if self._has_parent(): - styles = self._stylizer._styles - base = styles[self._element.getparent()].width - else: - base = self._profile.width - if 'width' in self._style: - width = self._style['width'] - if width == 'auto': - result = base + if self._width is None: + result = None + base = None + if self._has_parent(): + styles = self._stylizer._styles + base = styles[self._element.getparent()].width else: - result = self._unit_convert(width, base=base) - else: - result = base - self.__dict__['width'] = result - return result + base = self._profile.width + if 'width' in self._style: + width = self._style['width'] + if width == 'auto': + result = base + else: + result = self._unit_convert(width, base=base) + else: + result = base + self._width = result + return self._width def __str__(self): items = self._style.items() diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index 8a2bc2a4fa..a2c002b022 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -20,8 +20,6 @@ from calibre.ebooks.oeb.base import namespace, barename from calibre.ebooks.oeb.base import OEBBook from calibre.ebooks.oeb.stylizer import Stylizer -BASEFONT_CSS = 'body { font-size: %0.5fpt; }' - COLLAPSE = re.compile(r'[ \t\r\n\v]+') STRIPNUM = re.compile(r'[-0-9]+$') @@ -90,19 +88,11 @@ class CSSFlattener(object): def transform(self, oeb, context): self.oeb = oeb self.context = context - self.premangle_css() self.stylize_spine() self.sbase = self.baseline_spine() if self.fbase else None self.fmap = FontMapper(self.sbase, self.fbase, self.fkey) self.flatten_spine() - def premangle_css(self): - fbase = self.context.source.fbase - for item in self.oeb.manifest.values(): - if item.media_type in OEB_STYLES: - basefont_css = BASEFONT_CSS % (fbase,) - item.data = basefont_css + item.data - def stylize_spine(self): self.stylizers = {} profile = self.context.source @@ -112,13 +102,13 @@ class CSSFlattener(object): self.stylizers[item] = stylizer def baseline_node(self, node, stylizer, sizes, csize): - if node.tail: - sizes[csize] += len(COLLAPSE.sub(' ', node.tail)) csize = stylizer.style(node)['font-size'] if node.text: sizes[csize] += len(COLLAPSE.sub(' ', node.text)) for child in node: self.baseline_node(child, stylizer, sizes, csize) + if child.tail: + sizes[csize] += len(COLLAPSE.sub(' ', child.tail)) def baseline_spine(self): sizes = defaultdict(float) @@ -155,6 +145,27 @@ class CSSFlattener(object): tag = barename(node.tag) style = stylizer.style(node) cssdict = style.cssdict() + if 'align' in node.attrib: + cssdict['text-align'] = node.attrib['align'] + del node.attrib['align'] + if node.tag == XHTML('font'): + node.tag = XHTML('span') + if 'size' in node.attrib: + size = node.attrib['size'] + if size.startswith('+'): + cssdict['font-size'] = 'larger' + elif size.startswith('-'): + cssdict['font-size'] = 'smaller' + else: + fnums = self.context.source.fnums + cssdict['font-size'] = fnums[int(size)] + del node.attrib['size'] + if 'color' in node.attrib: + cssdict['color'] = node.attrib['color'] + del node.attrib['color'] + if 'bgcolor' in node.attrib: + cssdict['background-color'] = node.attrib['bgcolor'] + del node.attrib['bgcolor'] if cssdict: if 'font-size' in cssdict: fsize = self.fmap[style['font-size']] @@ -170,6 +181,8 @@ class CSSFlattener(object): left -= style['text-indent'] if self.unfloat and 'float' in cssdict and tag != 'img': del cssdict['float'] + if cssdict.get('display', 'none') != 'none': + del cssdict['display'] if 'vertical-align' in cssdict: if cssdict['vertical-align'] == 'sup': cssdict['vertical-align'] = 'super'