diff --git a/src/calibre/ebooks/docx/writer/from_html.py b/src/calibre/ebooks/docx/writer/from_html.py index d89f74d8ff..922f57a8a5 100644 --- a/src/calibre/ebooks/docx/writer/from_html.py +++ b/src/calibre/ebooks/docx/writer/from_html.py @@ -6,12 +6,121 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' +from lxml import etree + +from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero +from calibre.ebooks.oeb.stylizer import Stylizer +from calibre.ebooks.oeb.base import XPath, barename + + +class TextStyle(object): + + def __init__(self, css): + # for x in ('text-transform', 'text-shadow', 'font-variant', 'letter-spacing', 'vertical-align'): + self.font_family = css['font-family'] # TODO: Resolve multiple font families and generic font family names + try: + self.font_size = int(float(css['font-size']) * 2) # stylizer normalizes all font sizes into pts + except (ValueError, TypeError, AttributeError): + self.font_size = None + + fw = self.font_weight = css['font-weight'] + self.bold = fw in {'bold', 'bolder'} or int_or_zero(fw) >= 700 + self.font_style = css['font-style'] + self.italic = self.font_style in {'italic', 'oblique'} + self.color = convert_color(css['color']) + self.background_color = convert_color(css.backgroundColor) + td = set((css.effective_text_decoration or '').split()) + self.underline = 'underline' in td + self.dstrike = 'line-through' in td and 'overline' in td + self.strike = not self.dstrike and 'line-through' in td + + # TODO: Borders and padding + + def __hash__(self): + return hash(tuple( + getattr(self, x) for x in ('font_family', 'font_size', 'bold', 'italic', 'color', 'background_color', 'underline', 'strike', 'dstrike'))) + + def __eq__(self, other): + return hash(self) == hash(other) + +class TextRun(object): + + def __init__(self, style): + self.style = style + +class Block(object): + + def __init__(self): + self.texts = [] + + def add_text(self, text, style): + pass + class Convert(object): def __init__(self, oeb, docx): self.oeb, self.docx = oeb, docx self.log, self.opts = docx.log, docx.opts - def __call__(self): - pass + self.blocks = [] + + def __call__(self): + from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer + SVGRasterizer()(self.oeb, self.opts) + + for item in self.oeb.spine: + self.process_item(item) + + def process_item(self, item): + stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.opts.output_profile) + + for body in XPath('//h:body')(item.data): + b = Block() + self.blocks.append(b) + self.process_block(body, b, stylizer, ignore_tail=True) + + def process_block(self, html_block, docx_block, stylizer, ignore_tail=False): + if html_block.text: + docx_block.add_text(html_block.text, stylizer.style(html_block)) + + for child in html_block.iterchildren(etree.Element): + tag = barename(child.tag) + style = stylizer.style(child) + display = style.get('display', 'inline') + if tag == 'img': + return # TODO: Handle images + if display == 'block': + b = Block() + self.blocks.append(b) + self.process_block(child, b, stylizer) + else: + self.process_inline(child, self.blocks[-1], stylizer) + + if ignore_tail is False and html_block.tail: + b = docx_block + if b is not self.blocks[-1]: + b = Block() + self.blocks.append(b) + b.add_text(html_block.tail, stylizer.style(html_block.getparent())) + + def process_inline(self, html_child, docx_block, stylizer): + tag = barename(html_child.tag) + if tag == 'img': + return # TODO: Handle images + style = stylizer.style(html_child) + if html_child.text: + docx_block.add_text(html_child.text, style) + for child in html_child.iterchildren(etree.Element): + style = stylizer.style(child) + display = style.get('display', 'inline') + if display == 'block': + b = Block() + self.blocks.append(b) + self.process_block(child, b, stylizer) + else: + self.process_inline(child, self.blocks[-1], stylizer) + + if html_child.tail: + docx_block.add_text(html_child.tail, stylizer.style(html_child.getparent())) + diff --git a/src/calibre/ebooks/docx/writer/utils.py b/src/calibre/ebooks/docx/writer/utils.py new file mode 100644 index 0000000000..fc423f31d2 --- /dev/null +++ b/src/calibre/ebooks/docx/writer/utils.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +import re +from cssutils.css.colors import COLORS + +def int_or_zero(raw): + try: + return int(raw) + except (ValueError, TypeError, AttributeError): + return 0 + +# convert_color() {{{ +hex_pat = re.compile(r'#([0-9a-f]{6})') +hex3_pat = re.compile(r'#([0-9a-f]{3})') +rgb_pat = re.compile(r'rgba?\s*\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)') + +def convert_color(c): + if not c: + return None + c = c.lower().strip() + if c == 'transparent': + return None + try: + cval = COLORS[c] + except KeyError: + m = hex_pat.match(c) + if m is not None: + return c.upper() + m = hex3_pat.match(c) + if m is not None: + return '#' + (c[1]*2) + (c[2]*2) + (c[3]*2) + m = rgb_pat.match(c) + if m is not None: + return '#' + ''.join('%02X' % int(m.group(i)) for i in (1, 2, 3)) + else: + return '#' + ''.join('%02X' % int(x) for x in cval[:3]) + return None + +def test_convert_color(): + import unittest + class TestColors(unittest.TestCase): + + def test_color_conversion(self): + ae = self.assertEqual + cc = convert_color + ae(None, cc(None)) + ae(None, cc('transparent')) + ae(None, cc('none')) + ae(None, cc('#12j456')) + ae('#F0F8FF', cc('AliceBlue')) + ae('#000000', cc('black')) + ae(cc('#001'), '#000011') + ae('#12345D', cc('#12345d')) + ae('#FFFFFF', cc('rgb(255, 255, 255)')) + ae('#FF0000', cc('rgba(255, 0, 0, 23)')) + tests = unittest.defaultTestLoader.loadTestsFromTestCase(TestColors) + unittest.TextTestRunner(verbosity=4).run(tests) +# }}} diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index ad6846c456..7b61710c3c 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -494,6 +494,9 @@ class Style(object): result = DEFAULTS[name] return result + def get(self, name, default=None): + return self._style.get(name, default) + def _unit_convert(self, value, base=None, font=None): 'Return value in pts' if base is None: