mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Output: Nicer organization of output styles.
Styles are now merged into block styles that contain both paragraph and character formatting (for the most common character style in each block). All block styles inherit the Normal style and override only what is different.
This commit is contained in:
parent
2ddf85a341
commit
35699b1f7b
@ -48,7 +48,7 @@ class DOCXOutput(OutputFormatPlugin):
|
|||||||
from calibre.ebooks.docx.writer.from_html import Convert
|
from calibre.ebooks.docx.writer.from_html import Convert
|
||||||
docx = DOCX(opts, log)
|
docx = DOCX(opts, log)
|
||||||
self.convert_metadata(oeb)
|
self.convert_metadata(oeb)
|
||||||
Convert(oeb, docx)()
|
Convert(oeb, docx, self.mi)()
|
||||||
docx.write(output_path, self.mi)
|
docx.write(output_path, self.mi)
|
||||||
if opts.extract_to:
|
if opts.extract_to:
|
||||||
from calibre.ebooks.docx.dump import do_dump
|
from calibre.ebooks.docx.dump import do_dump
|
||||||
|
@ -53,6 +53,7 @@ class TextRun(object):
|
|||||||
self.style = style
|
self.style = style
|
||||||
self.texts = []
|
self.texts = []
|
||||||
self.link = None
|
self.link = None
|
||||||
|
self.parent_style = None
|
||||||
self.makelement = namespace.makeelement
|
self.makelement = namespace.makeelement
|
||||||
|
|
||||||
def add_text(self, text, preserve_whitespace, bookmark=None, link=None):
|
def add_text(self, text, preserve_whitespace, bookmark=None, link=None):
|
||||||
@ -75,8 +76,9 @@ class TextRun(object):
|
|||||||
makeelement = self.makelement
|
makeelement = self.makelement
|
||||||
parent = p if self.link is None else links_manager.serialize_hyperlink(p, self.link)
|
parent = p if self.link is None else links_manager.serialize_hyperlink(p, self.link)
|
||||||
r = makeelement(parent, 'w:r')
|
r = makeelement(parent, 'w:r')
|
||||||
rpr = makeelement(r, 'w:rPr')
|
if self.parent_style is not self.style:
|
||||||
makeelement(rpr, 'w:rStyle', w_val=self.style.id)
|
rpr = makeelement(r, 'w:rPr')
|
||||||
|
makeelement(rpr, 'w:rStyle', w_val=self.style.id)
|
||||||
|
|
||||||
for text, preserve_whitespace, bookmark in self.texts:
|
for text, preserve_whitespace, bookmark in self.texts:
|
||||||
if bookmark is not None:
|
if bookmark is not None:
|
||||||
@ -104,6 +106,14 @@ class TextRun(object):
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def style_weight(self):
|
||||||
|
ans = 0
|
||||||
|
for text, preserve_whitespace, bookmark in self.texts:
|
||||||
|
if isinstance(text, type('')):
|
||||||
|
ans += len(text)
|
||||||
|
return ans
|
||||||
|
|
||||||
class Block(object):
|
class Block(object):
|
||||||
|
|
||||||
def __init__(self, namespace, styles_manager, links_manager, html_block, style, is_table_cell=False, float_spec=None, is_list_item=False):
|
def __init__(self, namespace, styles_manager, links_manager, html_block, style, is_table_cell=False, float_spec=None, is_list_item=False):
|
||||||
@ -124,6 +134,7 @@ class Block(object):
|
|||||||
self.page_break_before = False
|
self.page_break_before = False
|
||||||
self.runs = []
|
self.runs = []
|
||||||
self.skipped = False
|
self.skipped = False
|
||||||
|
self.linked_style = None
|
||||||
|
|
||||||
def resolve_skipped(self, next_block):
|
def resolve_skipped(self, next_block):
|
||||||
if not self.is_empty():
|
if not self.is_empty():
|
||||||
@ -186,7 +197,8 @@ class Block(object):
|
|||||||
numpr = makeelement(ppr, 'w:numPr')
|
numpr = makeelement(ppr, 'w:numPr')
|
||||||
makeelement(numpr, 'w:ilvl', w_val=str(self.numbering_id[1]))
|
makeelement(numpr, 'w:ilvl', w_val=str(self.numbering_id[1]))
|
||||||
makeelement(numpr, 'w:numId', w_val=str(self.numbering_id[0]))
|
makeelement(numpr, 'w:numId', w_val=str(self.numbering_id[0]))
|
||||||
makeelement(ppr, 'w:pStyle', w_val=self.style.id)
|
if self.linked_style is not None:
|
||||||
|
makeelement(ppr, 'w:pStyle', w_val=self.linked_style.id)
|
||||||
if self.is_first_block:
|
if self.is_first_block:
|
||||||
makeelement(ppr, 'w:pageBreakBefore', w_val='off')
|
makeelement(ppr, 'w:pageBreakBefore', w_val='off')
|
||||||
for run in self.runs:
|
for run in self.runs:
|
||||||
@ -311,6 +323,9 @@ class Blocks(object):
|
|||||||
self.all_blocks[self.pos].page_break_before = True
|
self.all_blocks[self.pos].page_break_before = True
|
||||||
self.block_map = {}
|
self.block_map = {}
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return 'Block(%r)' % self.runs
|
||||||
|
|
||||||
class Convert(object):
|
class Convert(object):
|
||||||
|
|
||||||
# Word does not apply default styling to hyperlinks, so we ensure they get
|
# Word does not apply default styling to hyperlinks, so we ensure they get
|
||||||
@ -320,16 +335,17 @@ class Convert(object):
|
|||||||
a[href] { text-decoration: underline; color: blue }
|
a[href] { text-decoration: underline; color: blue }
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, oeb, docx):
|
def __init__(self, oeb, docx, mi):
|
||||||
self.oeb, self.docx = oeb, docx
|
self.oeb, self.docx = oeb, docx
|
||||||
self.log, self.opts = docx.log, docx.opts
|
self.log, self.opts = docx.log, docx.opts
|
||||||
|
self.mi = mi
|
||||||
|
|
||||||
def __call__(self):
|
def __call__(self):
|
||||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
||||||
self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
|
self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
|
||||||
self.svg_rasterizer(self.oeb, self.opts)
|
self.svg_rasterizer(self.oeb, self.opts)
|
||||||
|
|
||||||
self.styles_manager = StylesManager(self.docx.namespace)
|
self.styles_manager = StylesManager(self.docx.namespace, self.log, self.mi.language)
|
||||||
self.links_manager = LinksManager(self.docx.namespace, self.docx.document_relationships)
|
self.links_manager = LinksManager(self.docx.namespace, self.docx.document_relationships)
|
||||||
self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
|
self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
|
||||||
self.lists_manager = ListsManager(self.docx)
|
self.lists_manager = ListsManager(self.docx)
|
||||||
|
@ -13,7 +13,7 @@ from lxml import etree
|
|||||||
|
|
||||||
from calibre.ebooks import parse_css_length
|
from calibre.ebooks import parse_css_length
|
||||||
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
|
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
|
||||||
from calibre.utils.icu import numeric_sort_key
|
from calibre.utils.localization import lang_as_iso639_1
|
||||||
from tinycss.css21 import CSS21Parser
|
from tinycss.css21 import CSS21Parser
|
||||||
|
|
||||||
css_parser = CSS21Parser()
|
css_parser = CSS21Parser()
|
||||||
@ -43,6 +43,34 @@ def bmap(x):
|
|||||||
def is_dropcaps(html_tag, tag_style):
|
def is_dropcaps(html_tag, tag_style):
|
||||||
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode, with_tail=False)) < 5 and tag_style['float'] == 'left'
|
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding=unicode, with_tail=False)) < 5 and tag_style['float'] == 'left'
|
||||||
|
|
||||||
|
class CombinedStyle(object):
|
||||||
|
|
||||||
|
def __init__(self, bs, rs, blocks, namespace):
|
||||||
|
self.bs, self.rs, self.blocks = bs, rs, blocks
|
||||||
|
self.namespace = namespace
|
||||||
|
self.id = self.name = self.seq = None
|
||||||
|
|
||||||
|
def apply(self):
|
||||||
|
for block in self.blocks:
|
||||||
|
block.linked_style = self
|
||||||
|
for run in block.runs:
|
||||||
|
run.parent_style = self.rs
|
||||||
|
|
||||||
|
def serialize(self, styles, normal_style):
|
||||||
|
makeelement = self.namespace.makeelement
|
||||||
|
w = lambda x: '{%s}%s' % (self.namespace.namespaces['w'], x)
|
||||||
|
block = makeelement(styles, 'w:style', w_styleId=self.id, w_type='paragraph')
|
||||||
|
makeelement(block, 'w:name', w_val=self.name)
|
||||||
|
makeelement(block, 'w:qFormat')
|
||||||
|
if self is not normal_style:
|
||||||
|
makeelement(block, 'w:basedOn', w_val=normal_style.id)
|
||||||
|
if self.seq == 0:
|
||||||
|
block.set(w('default'), '1')
|
||||||
|
pPr = makeelement(block, 'w:pPr')
|
||||||
|
self.bs.serialize_properties(pPr, normal_style.bs)
|
||||||
|
rPr = makeelement(block, 'w:rPr')
|
||||||
|
self.rs.serialize_properties(rPr, normal_style.rs)
|
||||||
|
|
||||||
class FloatSpec(object):
|
class FloatSpec(object):
|
||||||
|
|
||||||
def __init__(self, namespace, html_tag, tag_style):
|
def __init__(self, namespace, html_tag, tag_style):
|
||||||
@ -134,14 +162,11 @@ class DOCXStyle(object):
|
|||||||
__str__ = __repr__
|
__str__ = __repr__
|
||||||
|
|
||||||
def serialize(self, styles, normal_style):
|
def serialize(self, styles, normal_style):
|
||||||
w, makeelement = self.w, self.makeelement
|
makeelement = self.makeelement
|
||||||
style = makeelement(styles, 'style', styleId=self.id, type=self.TYPE)
|
style = makeelement(styles, 'style', styleId=self.id, type=self.TYPE)
|
||||||
style.append(makeelement(style, 'name', val=self.name))
|
style.append(makeelement(style, 'name', val=self.name))
|
||||||
if self is normal_style:
|
if self is not normal_style:
|
||||||
style.set(w('default'), '1')
|
|
||||||
else:
|
|
||||||
style.append(makeelement(style, 'basedOn', val=normal_style.id))
|
style.append(makeelement(style, 'basedOn', val=normal_style.id))
|
||||||
style.append(makeelement(style, 'qFormat'))
|
|
||||||
styles.append(style)
|
styles.append(style)
|
||||||
return style
|
return style
|
||||||
|
|
||||||
@ -235,13 +260,14 @@ class TextStyle(DOCXStyle):
|
|||||||
|
|
||||||
def serialize_borders(self, bdr, normal_style):
|
def serialize_borders(self, bdr, normal_style):
|
||||||
w = self.w
|
w = self.w
|
||||||
if (self.padding not in (None, ignore, 0) and self is normal_style) or self.padding != normal_style.padding:
|
is_normal_style = self is normal_style
|
||||||
|
if is_normal_style or self.padding != normal_style.padding:
|
||||||
bdr.set(w('space'), str(0 if self.padding in (None, ignore) else self.padding))
|
bdr.set(w('space'), str(0 if self.padding in (None, ignore) else self.padding))
|
||||||
if (self.border_width not in (None, ignore, 0) and self is normal_style) or self.border_width != normal_style.border_width:
|
if is_normal_style or self.border_width != normal_style.border_width:
|
||||||
bdr.set(w('sz'), str(0 if self.border_width in (None, ignore) else self.border_width))
|
bdr.set(w('sz'), str(0 if self.border_width in (None, ignore) else self.border_width))
|
||||||
if (self.border_style not in (None, ignore, 'none') and self is normal_style) or self.border_style != normal_style.border_style:
|
if is_normal_style or self.border_style != normal_style.border_style:
|
||||||
bdr.set(w('val'), 'none' if self.border_style in (None, ignore) else self.border_style)
|
bdr.set(w('val'), 'none' if self.border_style in (None, ignore) else self.border_style)
|
||||||
if (self.border_color not in (None, ignore, 'auto') and self is normal_style) or self.border_color != normal_style.border_color:
|
if is_normal_style or self.border_color != normal_style.border_color:
|
||||||
bdr.set(w('color'), 'auto' if self.border_color in (None, ignore) else self.border_color)
|
bdr.set(w('color'), 'auto' if self.border_color in (None, ignore) else self.border_color)
|
||||||
return bdr
|
return bdr
|
||||||
|
|
||||||
@ -249,53 +275,58 @@ class TextStyle(DOCXStyle):
|
|||||||
makeelement = self.makeelement
|
makeelement = self.makeelement
|
||||||
style_root = DOCXStyle.serialize(self, styles, normal_style)
|
style_root = DOCXStyle.serialize(self, styles, normal_style)
|
||||||
style = makeelement(style_root, 'rPr')
|
style = makeelement(style_root, 'rPr')
|
||||||
|
self.serialize_properties(style, normal_style)
|
||||||
if self is normal_style or self.font_family != normal_style.font_family:
|
|
||||||
style.append(makeelement(
|
|
||||||
style, 'rFonts', **{k:self.font_family for k in 'ascii cs eastAsia hAnsi'.split()}))
|
|
||||||
|
|
||||||
for name, attr, vmap in (('sz', 'font_size', str), ('b', 'bold', bmap), ('i', 'italic', bmap)):
|
|
||||||
val = getattr(self, attr)
|
|
||||||
if self is normal_style or getattr(normal_style, attr) != val:
|
|
||||||
for suffix in ('', 'Cs'):
|
|
||||||
style.append(makeelement(style, name + suffix, val=vmap(val)))
|
|
||||||
|
|
||||||
def check_attr(attr):
|
|
||||||
val = getattr(self, attr)
|
|
||||||
return (self is normal_style and val is not False and val is not None) or (val != getattr(normal_style, attr))
|
|
||||||
|
|
||||||
if check_attr('color'):
|
|
||||||
style.append(makeelement(style, 'color', val=self.color or 'auto'))
|
|
||||||
if check_attr('background_color'):
|
|
||||||
style.append(makeelement(style, 'shd', fill=self.background_color or 'auto'))
|
|
||||||
if check_attr('underline'):
|
|
||||||
style.append(makeelement(style, 'u', val='single' if self.underline else 'none'))
|
|
||||||
if check_attr('dstrike'):
|
|
||||||
style.append(makeelement(style, 'dstrike', val=bmap(self.dstrike)))
|
|
||||||
if check_attr('strike'):
|
|
||||||
style.append(makeelement(style, 'strike', val=bmap(self.strike)))
|
|
||||||
if check_attr('caps'):
|
|
||||||
style.append(makeelement(style, 'caps', val=bmap(self.caps)))
|
|
||||||
if check_attr('small_caps'):
|
|
||||||
style.append(makeelement(style, 'smallCaps', val=bmap(self.small_caps)))
|
|
||||||
if check_attr('shadow'):
|
|
||||||
style.append(makeelement(style, 'shadow', val=bmap(self.shadow)))
|
|
||||||
if check_attr('spacing'):
|
|
||||||
style.append(makeelement(style, 'spacing', val=str(self.spacing or 0)))
|
|
||||||
if (self is normal_style and self.vertical_align in {'superscript', 'subscript'}) or self.vertical_align != normal_style.vertical_align:
|
|
||||||
if self.vertical_align in {'superscript', 'subscript', 'baseline'}:
|
|
||||||
style.append(makeelement(style, 'vertAlign', val=self.vertical_align))
|
|
||||||
else:
|
|
||||||
style.append(makeelement(style, 'position', val=self.vertical_align))
|
|
||||||
|
|
||||||
bdr = self.serialize_borders(makeelement(style, 'bdr'), normal_style)
|
|
||||||
if bdr.attrib:
|
|
||||||
style.append(bdr)
|
|
||||||
|
|
||||||
if len(style) > 0:
|
if len(style) > 0:
|
||||||
style_root.append(style)
|
style_root.append(style)
|
||||||
return style_root
|
return style_root
|
||||||
|
|
||||||
|
def serialize_properties(self, rPr, normal_style):
|
||||||
|
makeelement = self.makeelement
|
||||||
|
is_normal_style = self is normal_style
|
||||||
|
if is_normal_style or self.font_family != normal_style.font_family:
|
||||||
|
rPr.append(makeelement(
|
||||||
|
rPr, 'rFonts', **{k:self.font_family for k in 'ascii cs eastAsia hAnsi'.split()}))
|
||||||
|
|
||||||
|
for name, attr, vmap in (('sz', 'font_size', str), ('b', 'bold', bmap), ('i', 'italic', bmap)):
|
||||||
|
val = getattr(self, attr)
|
||||||
|
if is_normal_style or getattr(normal_style, attr) != val:
|
||||||
|
for suffix in ('', 'Cs'):
|
||||||
|
rPr.append(makeelement(rPr, name + suffix, val=vmap(val)))
|
||||||
|
|
||||||
|
def check_attr(attr):
|
||||||
|
val = getattr(self, attr)
|
||||||
|
return is_normal_style or (val != getattr(normal_style, attr))
|
||||||
|
|
||||||
|
if check_attr('color'):
|
||||||
|
rPr.append(makeelement(rPr, 'color', val=self.color or 'auto'))
|
||||||
|
if check_attr('background_color'):
|
||||||
|
rPr.append(makeelement(rPr, 'shd', fill=self.background_color or 'auto'))
|
||||||
|
if check_attr('underline'):
|
||||||
|
rPr.append(makeelement(rPr, 'u', val='single' if self.underline else 'none'))
|
||||||
|
if check_attr('dstrike'):
|
||||||
|
rPr.append(makeelement(rPr, 'dstrike', val=bmap(self.dstrike)))
|
||||||
|
if check_attr('strike'):
|
||||||
|
rPr.append(makeelement(rPr, 'strike', val=bmap(self.strike)))
|
||||||
|
if check_attr('caps'):
|
||||||
|
rPr.append(makeelement(rPr, 'caps', val=bmap(self.caps)))
|
||||||
|
if check_attr('small_caps'):
|
||||||
|
rPr.append(makeelement(rPr, 'smallCaps', val=bmap(self.small_caps)))
|
||||||
|
if check_attr('shadow'):
|
||||||
|
rPr.append(makeelement(rPr, 'shadow', val=bmap(self.shadow)))
|
||||||
|
if check_attr('spacing'):
|
||||||
|
rPr.append(makeelement(rPr, 'spacing', val=str(self.spacing or 0)))
|
||||||
|
if is_normal_style:
|
||||||
|
rPr.append(makeelement(rPr, 'vertAlign', val=self.vertical_align if self.vertical_align in {'superscript', 'subscript'} else 'baseline'))
|
||||||
|
elif self.vertical_align != normal_style.vertical_align:
|
||||||
|
if self.vertical_align in {'superscript', 'subscript', 'baseline'}:
|
||||||
|
rPr.append(makeelement(rPr, 'vertAlign', val=self.vertical_align))
|
||||||
|
else:
|
||||||
|
rPr.append(makeelement(rPr, 'position', val=self.vertical_align))
|
||||||
|
|
||||||
|
bdr = self.serialize_borders(makeelement(rPr, 'bdr'), normal_style)
|
||||||
|
if bdr.attrib:
|
||||||
|
rPr.append(bdr)
|
||||||
|
|
||||||
def read_css_block_borders(self, css, store_css_style=False):
|
def read_css_block_borders(self, css, store_css_style=False):
|
||||||
for edge in border_edges:
|
for edge in border_edges:
|
||||||
if css is None:
|
if css is None:
|
||||||
@ -385,11 +416,17 @@ class BlockStyle(DOCXStyle):
|
|||||||
return bdr
|
return bdr
|
||||||
|
|
||||||
def serialize(self, styles, normal_style):
|
def serialize(self, styles, normal_style):
|
||||||
w, makeelement = self.w, self.makeelement
|
makeelement = self.makeelement
|
||||||
style_root = DOCXStyle.serialize(self, styles, normal_style)
|
style_root = DOCXStyle.serialize(self, styles, normal_style)
|
||||||
style = makeelement(style_root, 'pPr')
|
style = makeelement(style_root, 'pPr')
|
||||||
|
|
||||||
spacing = makeelement(style, 'spacing')
|
if len(style) > 0:
|
||||||
|
style_root.append(style)
|
||||||
|
return style_root
|
||||||
|
|
||||||
|
def serialize_properties(self, pPr, normal_style):
|
||||||
|
makeelement, w = self.makeelement, self.w
|
||||||
|
spacing = makeelement(pPr, 'spacing')
|
||||||
for edge, attr in {'top':'before', 'bottom':'after'}.iteritems():
|
for edge, attr in {'top':'before', 'bottom':'after'}.iteritems():
|
||||||
getter = attrgetter('css_margin_' + edge)
|
getter = attrgetter('css_margin_' + edge)
|
||||||
css_val, css_unit = parse_css_length(getter(self))
|
css_val, css_unit = parse_css_length(getter(self))
|
||||||
@ -408,9 +445,9 @@ class BlockStyle(DOCXStyle):
|
|||||||
spacing.set(w('lineRule'), 'atLeast')
|
spacing.set(w('lineRule'), 'atLeast')
|
||||||
|
|
||||||
if spacing.attrib:
|
if spacing.attrib:
|
||||||
style.append(spacing)
|
pPr.append(spacing)
|
||||||
|
|
||||||
ind = makeelement(style, 'ind')
|
ind = makeelement(pPr, 'ind')
|
||||||
for edge in ('left', 'right'):
|
for edge in ('left', 'right'):
|
||||||
getter = attrgetter('css_margin_' + edge)
|
getter = attrgetter('css_margin_' + edge)
|
||||||
css_val, css_unit = parse_css_length(getter(self))
|
css_val, css_unit = parse_css_length(getter(self))
|
||||||
@ -444,35 +481,35 @@ class BlockStyle(DOCXStyle):
|
|||||||
ind.set(w('hanging'), str(abs(val)))
|
ind.set(w('hanging'), str(abs(val)))
|
||||||
ind.set(w('hangingChars'), '0')
|
ind.set(w('hangingChars'), '0')
|
||||||
if ind.attrib:
|
if ind.attrib:
|
||||||
style.append(ind)
|
pPr.append(ind)
|
||||||
|
|
||||||
if (self is normal_style and self.background_color) or self.background_color != normal_style.background_color:
|
if (self is normal_style and self.background_color) or self.background_color != normal_style.background_color:
|
||||||
style.append(makeelement(style, 'shd', val='clear', color='auto', fill=self.background_color or 'auto'))
|
pPr.append(makeelement(pPr, 'shd', val='clear', color='auto', fill=self.background_color or 'auto'))
|
||||||
|
|
||||||
pbdr = self.serialize_borders(style.makeelement(w('pBdr')), normal_style)
|
pbdr = self.serialize_borders(pPr.makeelement(w('pBdr')), normal_style)
|
||||||
if len(pbdr):
|
if len(pbdr):
|
||||||
style.append(pbdr)
|
pPr.append(pbdr)
|
||||||
|
|
||||||
if self is normal_style or self.text_align != normal_style.text_align:
|
if self is normal_style or self.text_align != normal_style.text_align:
|
||||||
style.append(makeelement(style, 'jc', val=self.text_align))
|
pPr.append(makeelement(pPr, 'jc', val=self.text_align))
|
||||||
|
|
||||||
if (self is normal_style and self.page_break_before) or self.page_break_before != normal_style.page_break_before:
|
if (self is normal_style and self.page_break_before) or self.page_break_before != normal_style.page_break_before:
|
||||||
style.append(makeelement(style, 'pageBreakBefore', val=bmap(self.page_break_before)))
|
pPr.append(makeelement(pPr, 'pageBreakBefore', val=bmap(self.page_break_before)))
|
||||||
if (self is normal_style and self.keep_lines) or self.keep_lines != normal_style.keep_lines:
|
if (self is normal_style and self.keep_lines) or self.keep_lines != normal_style.keep_lines:
|
||||||
style.append(makeelement(style, 'keepLines', val=bmap(self.keep_lines)))
|
pPr.append(makeelement(pPr, 'keepLines', val=bmap(self.keep_lines)))
|
||||||
|
|
||||||
if self is not normal_style and self.next_style is not None:
|
if self is not normal_style and self.next_style is not None:
|
||||||
style.append(makeelement(style, 'next', val=self.next_style))
|
pPr.append(makeelement(pPr, 'next', val=self.next_style))
|
||||||
|
|
||||||
if len(style) > 0:
|
|
||||||
style_root.append(style)
|
|
||||||
return style_root
|
|
||||||
|
|
||||||
|
|
||||||
class StylesManager(object):
|
class StylesManager(object):
|
||||||
|
|
||||||
def __init__(self, namespace):
|
def __init__(self, namespace, log, document_lang):
|
||||||
self.namespace = namespace
|
self.namespace = namespace
|
||||||
|
self.document_lang = lang_as_iso639_1(document_lang) or 'en-US'
|
||||||
|
if self.document_lang == 'en':
|
||||||
|
self.document_lang = 'en-US'
|
||||||
|
self.log = log
|
||||||
self.block_styles, self.text_styles = {}, {}
|
self.block_styles, self.text_styles = {}, {}
|
||||||
|
|
||||||
def create_text_style(self, css_style, is_parent_style=False):
|
def create_text_style(self, css_style, is_parent_style=False):
|
||||||
@ -496,37 +533,55 @@ class StylesManager(object):
|
|||||||
def finalize(self, blocks):
|
def finalize(self, blocks):
|
||||||
block_counts, run_counts = Counter(), Counter()
|
block_counts, run_counts = Counter(), Counter()
|
||||||
block_rmap, run_rmap = defaultdict(list), defaultdict(list)
|
block_rmap, run_rmap = defaultdict(list), defaultdict(list)
|
||||||
|
used_pairs = defaultdict(list)
|
||||||
|
|
||||||
for block in blocks:
|
for block in blocks:
|
||||||
block_counts[block.style] += 1
|
bs = block.style
|
||||||
|
block_counts[bs] += 1
|
||||||
block_rmap[block.style].append(block)
|
block_rmap[block.style].append(block)
|
||||||
|
local_run_counts = Counter()
|
||||||
for run in block.runs:
|
for run in block.runs:
|
||||||
run_counts[run.style] += (0 if run.is_empty() else 1)
|
count = run.style_weight
|
||||||
|
run_counts[run.style] += count
|
||||||
|
local_run_counts[run.style] += count
|
||||||
run_rmap[run.style].append(run)
|
run_rmap[run.style].append(run)
|
||||||
bnum = len(str(max(1, len(block_counts) - 1)))
|
if local_run_counts:
|
||||||
for i, (block_style, count) in enumerate(block_counts.most_common()):
|
rs = local_run_counts.most_common(1)[0][0]
|
||||||
if i == 0:
|
used_pairs[(bs, rs)].append(block)
|
||||||
self.normal_block_style = block_style
|
|
||||||
block_style.id = 'ParagraphNormal'
|
|
||||||
else:
|
|
||||||
block_style.id = 'Paragraph%d' % i
|
|
||||||
block_style.name = '%0{}d Para'.format(bnum) % i
|
|
||||||
rnum = len(str(max(1, len(run_counts) - 1)))
|
rnum = len(str(max(1, len(run_counts) - 1)))
|
||||||
for i, (text_style, count) in enumerate(run_counts.most_common()):
|
for i, (text_style, count) in enumerate(run_counts.most_common()):
|
||||||
|
text_style.id = 'Text%d' % i
|
||||||
|
text_style.name = '%0{}d Text'.format(rnum) % i
|
||||||
|
text_style.seq = i
|
||||||
if i == 0:
|
if i == 0:
|
||||||
self.normal_text_style = text_style
|
self.normal_text_style = text_style
|
||||||
text_style.id = 'TextNormal'
|
|
||||||
else:
|
|
||||||
text_style.id = 'Text%d' % i
|
|
||||||
text_style.name = '%0{}d Text'.format(rnum) % i
|
|
||||||
for s in tuple(self.block_styles):
|
|
||||||
if s.id is None:
|
|
||||||
self.block_styles.pop(s)
|
|
||||||
for s in tuple(self.text_styles):
|
for s in tuple(self.text_styles):
|
||||||
if s.id is None:
|
if s.id is None:
|
||||||
self.text_styles.pop(s)
|
self.text_styles.pop(s)
|
||||||
|
|
||||||
|
counts = Counter()
|
||||||
|
for (bs, rs), blocks in used_pairs.iteritems():
|
||||||
|
s = CombinedStyle(bs, rs, blocks, self.namespace)
|
||||||
|
counts[s] += sum(1 for b in blocks if not b.is_empty())
|
||||||
|
snum = len(str(max(1, len(counts) - 1)))
|
||||||
|
for i, (style, count) in enumerate(counts.most_common()):
|
||||||
|
if i == 0:
|
||||||
|
self.normal_style = style
|
||||||
|
style.id = style.name = 'Normal'
|
||||||
|
else:
|
||||||
|
style.id = style.name = 'Para %0{}d'.format(snum) % i
|
||||||
|
style.seq = i
|
||||||
|
self.combined_styles = sorted(counts.iterkeys(), key=attrgetter('seq'))
|
||||||
|
[ls.apply() for ls in self.combined_styles]
|
||||||
|
self.log.debug('%d Text Styles %d Combined styles' % tuple(map(len, (
|
||||||
|
self.text_styles, self.combined_styles))))
|
||||||
|
|
||||||
def serialize(self, styles):
|
def serialize(self, styles):
|
||||||
for style in sorted(self.block_styles, key=lambda s:(s is not self.normal_block_style, numeric_sort_key(s.id))):
|
lang = styles.xpath('descendant::*[local-name()="lang"]')[0]
|
||||||
style.serialize(styles, self.normal_block_style)
|
for k in tuple(lang.attrib):
|
||||||
for style in sorted(self.text_styles, key=lambda s:(s is not self.normal_text_style, numeric_sort_key(s.id))):
|
lang.attrib[k] = self.document_lang
|
||||||
|
for style in self.combined_styles:
|
||||||
|
style.serialize(styles, self.normal_style)
|
||||||
|
for style in sorted(self.text_styles, key=attrgetter('seq')):
|
||||||
style.serialize(styles, self.normal_text_style)
|
style.serialize(styles, self.normal_text_style)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user