Start work on converting character styles

This commit is contained in:
Kovid Goyal 2013-08-20 11:27:36 +05:30
parent c26f948f75
commit 05040af6fb
3 changed files with 178 additions and 2 deletions

View File

@ -6,12 +6,121 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from lxml import etree
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.base import XPath, barename
class TextStyle(object):
def __init__(self, css):
# for x in ('text-transform', 'text-shadow', 'font-variant', 'letter-spacing', 'vertical-align'):
self.font_family = css['font-family'] # TODO: Resolve multiple font families and generic font family names
try:
self.font_size = int(float(css['font-size']) * 2) # stylizer normalizes all font sizes into pts
except (ValueError, TypeError, AttributeError):
self.font_size = None
fw = self.font_weight = css['font-weight']
self.bold = fw in {'bold', 'bolder'} or int_or_zero(fw) >= 700
self.font_style = css['font-style']
self.italic = self.font_style in {'italic', 'oblique'}
self.color = convert_color(css['color'])
self.background_color = convert_color(css.backgroundColor)
td = set((css.effective_text_decoration or '').split())
self.underline = 'underline' in td
self.dstrike = 'line-through' in td and 'overline' in td
self.strike = not self.dstrike and 'line-through' in td
# TODO: Borders and padding
def __hash__(self):
return hash(tuple(
getattr(self, x) for x in ('font_family', 'font_size', 'bold', 'italic', 'color', 'background_color', 'underline', 'strike', 'dstrike')))
def __eq__(self, other):
return hash(self) == hash(other)
class TextRun(object):
def __init__(self, style):
self.style = style
class Block(object):
def __init__(self):
self.texts = []
def add_text(self, text, style):
pass
class Convert(object):
def __init__(self, oeb, docx):
self.oeb, self.docx = oeb, docx
self.log, self.opts = docx.log, docx.opts
def __call__(self):
pass
self.blocks = []
def __call__(self):
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
SVGRasterizer()(self.oeb, self.opts)
for item in self.oeb.spine:
self.process_item(item)
def process_item(self, item):
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.opts.output_profile)
for body in XPath('//h:body')(item.data):
b = Block()
self.blocks.append(b)
self.process_block(body, b, stylizer, ignore_tail=True)
def process_block(self, html_block, docx_block, stylizer, ignore_tail=False):
if html_block.text:
docx_block.add_text(html_block.text, stylizer.style(html_block))
for child in html_block.iterchildren(etree.Element):
tag = barename(child.tag)
style = stylizer.style(child)
display = style.get('display', 'inline')
if tag == 'img':
return # TODO: Handle images
if display == 'block':
b = Block()
self.blocks.append(b)
self.process_block(child, b, stylizer)
else:
self.process_inline(child, self.blocks[-1], stylizer)
if ignore_tail is False and html_block.tail:
b = docx_block
if b is not self.blocks[-1]:
b = Block()
self.blocks.append(b)
b.add_text(html_block.tail, stylizer.style(html_block.getparent()))
def process_inline(self, html_child, docx_block, stylizer):
tag = barename(html_child.tag)
if tag == 'img':
return # TODO: Handle images
style = stylizer.style(html_child)
if html_child.text:
docx_block.add_text(html_child.text, style)
for child in html_child.iterchildren(etree.Element):
style = stylizer.style(child)
display = style.get('display', 'inline')
if display == 'block':
b = Block()
self.blocks.append(b)
self.process_block(child, b, stylizer)
else:
self.process_inline(child, self.blocks[-1], stylizer)
if html_child.tail:
docx_block.add_text(html_child.tail, stylizer.style(html_child.getparent()))

View File

@ -0,0 +1,64 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import re
from cssutils.css.colors import COLORS
def int_or_zero(raw):
try:
return int(raw)
except (ValueError, TypeError, AttributeError):
return 0
# convert_color() {{{
hex_pat = re.compile(r'#([0-9a-f]{6})')
hex3_pat = re.compile(r'#([0-9a-f]{3})')
rgb_pat = re.compile(r'rgba?\s*\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)')
def convert_color(c):
if not c:
return None
c = c.lower().strip()
if c == 'transparent':
return None
try:
cval = COLORS[c]
except KeyError:
m = hex_pat.match(c)
if m is not None:
return c.upper()
m = hex3_pat.match(c)
if m is not None:
return '#' + (c[1]*2) + (c[2]*2) + (c[3]*2)
m = rgb_pat.match(c)
if m is not None:
return '#' + ''.join('%02X' % int(m.group(i)) for i in (1, 2, 3))
else:
return '#' + ''.join('%02X' % int(x) for x in cval[:3])
return None
def test_convert_color():
import unittest
class TestColors(unittest.TestCase):
def test_color_conversion(self):
ae = self.assertEqual
cc = convert_color
ae(None, cc(None))
ae(None, cc('transparent'))
ae(None, cc('none'))
ae(None, cc('#12j456'))
ae('#F0F8FF', cc('AliceBlue'))
ae('#000000', cc('black'))
ae(cc('#001'), '#000011')
ae('#12345D', cc('#12345d'))
ae('#FFFFFF', cc('rgb(255, 255, 255)'))
ae('#FF0000', cc('rgba(255, 0, 0, 23)'))
tests = unittest.defaultTestLoader.loadTestsFromTestCase(TestColors)
unittest.TextTestRunner(verbosity=4).run(tests)
# }}}

View File

@ -494,6 +494,9 @@ class Style(object):
result = DEFAULTS[name]
return result
def get(self, name, default=None):
return self._style.get(name, default)
def _unit_convert(self, value, base=None, font=None):
'Return value in pts'
if base is None: