From fd9ff327c70655a470d6e8070fa8a37fdc7b3a76 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 28 Apr 2015 16:00:05 +0530 Subject: [PATCH] DOCX Output: Finish up support for lists (except for image bullets) --- src/calibre/ebooks/docx/writer/TODO | 2 +- src/calibre/ebooks/docx/writer/container.py | 3 + src/calibre/ebooks/docx/writer/from_html.py | 6 ++ src/calibre/ebooks/docx/writer/lists.py | 95 +++++++++++++++++++-- 4 files changed, 97 insertions(+), 9 deletions(-) diff --git a/src/calibre/ebooks/docx/writer/TODO b/src/calibre/ebooks/docx/writer/TODO index 36b7677104..cb5b868cd5 100644 --- a/src/calibre/ebooks/docx/writer/TODO +++ b/src/calibre/ebooks/docx/writer/TODO @@ -2,7 +2,7 @@ Table of Contents Links
tag (probably as an empty block with a border) Various TODOs sprinkled through the source -Lists +List image bullet Cover image RTL text Lang support in run styles diff --git a/src/calibre/ebooks/docx/writer/container.py b/src/calibre/ebooks/docx/writer/container.py index 9d3eb29d08..079c039197 100644 --- a/src/calibre/ebooks/docx/writer/container.py +++ b/src/calibre/ebooks/docx/writer/container.py @@ -98,6 +98,7 @@ class DocumentRelationships(object): self.namespace = namespace for typ, target in { namespace.names['STYLES']: 'styles.xml', + namespace.names['NUMBERING']: 'numbering.xml', namespace.names['WEB_SETTINGS']: 'webSettings.xml', namespace.names['FONTS']: 'fontTable.xml', }.iteritems(): @@ -135,6 +136,7 @@ class DOCX(object): self.opts, self.log = opts, log self.document_relationships = DocumentRelationships(self.namespace) self.font_table = etree.Element('{%s}fonts' % namespaces['w'], nsmap={k:namespaces[k] for k in 'wr'}) + self.numbering = etree.Element('{%s}numbering' % namespaces['w'], nsmap={k:namespaces[k] for k in 'wr'}) E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']}) self.embedded_fonts = E.Relationships() self.fonts = {} @@ -239,6 +241,7 @@ class DOCX(object): zf.writestr('word/webSettings.xml', self.websettings) zf.writestr('word/document.xml', xml2str(self.document)) zf.writestr('word/styles.xml', xml2str(self.styles)) + zf.writestr('word/numbering.xml', xml2str(self.numbering)) zf.writestr('word/fontTable.xml', xml2str(self.font_table)) zf.writestr('word/_rels/document.xml.rels', self.document_relationships.serialize()) zf.writestr('word/_rels/fontTable.xml.rels', xml2str(self.embedded_fonts)) diff --git a/src/calibre/ebooks/docx/writer/from_html.py b/src/calibre/ebooks/docx/writer/from_html.py index 65e5338b3e..e859eae5d2 100644 --- a/src/calibre/ebooks/docx/writer/from_html.py +++ b/src/calibre/ebooks/docx/writer/from_html.py @@ -100,6 +100,7 @@ class Block(object): def __init__(self, namespace, styles_manager, html_block, style, is_table_cell=False, float_spec=None, is_list_item=False): self.namespace = namespace self.list_tag = (html_block, style) if is_list_item else None + self.numbering_id = None self.parent_items = None self.html_block = html_block self.float_spec = float_spec @@ -165,6 +166,10 @@ class Block(object): makeelement(ppr, 'w:pageBreakBefore') if self.float_spec is not None: self.float_spec.serialize(self, ppr) + if self.numbering_id is not None: + numpr = makeelement(ppr, 'w:numPr') + makeelement(numpr, 'w:ilvl', w_val=str(self.numbering_id[1])) + makeelement(numpr, 'w:numId', w_val=str(self.numbering_id[0])) makeelement(ppr, 'w:pStyle', w_val=self.style.id) for run in self.runs: run.serialize(p) @@ -409,3 +414,4 @@ class Convert(object): self.styles_manager.serialize(self.docx.styles) self.images_manager.serialize(self.docx.images) self.fonts_manager.serialize(self.styles_manager.text_styles, self.docx.font_table, self.docx.embedded_fonts, self.docx.fonts) + self.lists_manager.serialize(self.docx.numbering) diff --git a/src/calibre/ebooks/docx/writer/lists.py b/src/calibre/ebooks/docx/writer/lists.py index e74e756793..b9dfc15ca4 100644 --- a/src/calibre/ebooks/docx/writer/lists.py +++ b/src/calibre/ebooks/docx/writer/lists.py @@ -7,6 +7,7 @@ __license__ = 'GPL v3' __copyright__ = '2015, Kovid Goyal ' from collections import defaultdict +from operator import attrgetter LIST_STYLES = frozenset( 'disc circle square decimal decimal-leading-zero lower-roman upper-roman' @@ -34,7 +35,7 @@ STYLE_MAP = { def find_list_containers(list_tag, tag_style): node = list_tag - stylizer = tag_style.stylizer + stylizer = tag_style._stylizer ans = [] while True: parent = node.getparent() @@ -49,8 +50,42 @@ def find_list_containers(list_tag, tag_style): class NumberingDefinition(object): - def __init__(self): - pass + def __init__(self, top_most, stylizer, namespace): + self.namespace = namespace + self.top_most = top_most + self.stylizer = stylizer + self.level_map = defaultdict(list) + self.num_id = None + + def finalize(self): + items_for_level = defaultdict(list) + container_for_level = {} + type_for_level = {} + for ilvl, items in self.level_map.iteritems(): + for container, list_tag, block, list_type, tag_style in items: + items_for_level[ilvl].append(list_tag) + container_for_level[ilvl] = container + type_for_level[ilvl] = list_type + self.levels = tuple( + Level(type_for_level[ilvl], container_for_level[ilvl], items_for_level[ilvl], ilvl=ilvl) + for ilvl in sorted(self.level_map) + ) + + def __hash__(self): + return hash(self.levels) + + def link_blocks(self): + for ilvl, items in self.level_map.iteritems(): + for container, list_tag, block, list_type, tag_style in items: + block.numbering_id = (self.num_id + 1, ilvl) + + def serialize(self, parent): + makeelement = self.namespace.makeelement + an = makeelement(parent, 'w:abstractNum', w_abstractNumId=str(self.num_id)) + makeelement(an, 'w:multiLevelType', w_val='hybridMultilevel') + makeelement(an, 'w:name', w_val='List %d' % (self.num_id + 1)) + for level in self.levels: + level.serialize(an, makeelement) class Level(object): @@ -60,20 +95,40 @@ class Level(object): self.start = int(container.get('start')) except Exception: self.start = 1 + if items: + try: + self.start = int(items[0].get('value')) + except Exception: + pass if list_type in {'disc', 'circle', 'square'}: self.num_fmt = 'bullet' - self.lvl_text = '%1' if list_type == 'disc' else STYLE_MAP['list_type'] + self.lvl_text = '\uf0b7' if list_type == 'disc' else STYLE_MAP[list_type] else: - self.lvl_text = '%1.' + self.lvl_text = '%{}.'.format(self.ilvl + 1) self.num_fmt = STYLE_MAP.get(list_type, 'decimal') -class ListManager(object): + def __hash__(self): + return hash((self.start, self.num_fmt, self.lvl_text)) + + def serialize(self, parent, makeelement): + lvl = makeelement(parent, 'w:lvl', w_ilvl=str(self.ilvl)) + makeelement(lvl, 'w:start', w_val=str(self.start)) + makeelement(lvl, 'w:numFmt', w_val=self.num_fmt) + makeelement(lvl, 'w:lvlText', w_val=self.lvl_text) + makeelement(lvl, 'w:lvlJc', w_val='left') + makeelement(makeelement(lvl, 'w:pPr'), 'w:ind', w_hanging='360', w_left=str(1152 + self.ilvl * 360)) + if self.num_fmt == 'bullet': + ff = {'\uf0b7':'Symbol', '\uf0a7':'Wingdings'}.get(self.lvl_text, 'Courier New') + makeelement(makeelement(lvl, 'w:rPr'), 'w:rFonts', w_ascii=ff, w_hAnsi=ff, w_hint="default") + +class ListsManager(object): def __init__(self, docx): self.namespace = docx.namespace + self.lists = {} def finalize(self, all_blocks): - lists = defaultdict(list) + lists = {} for block in all_blocks: if block.list_tag is not None: list_tag, tag_style = block.list_tag @@ -83,4 +138,28 @@ class ListManager(object): container_tags = find_list_containers(list_tag, tag_style) if not container_tags: continue - lists[(tuple(container_tags), list_type)].append((list_tag, tag_style)) + top_most = container_tags[-1] + if top_most not in lists: + lists[top_most] = NumberingDefinition(top_most, tag_style._stylizer, self.namespace) + l = lists[top_most] + ilvl = len(container_tags) - 1 + l.level_map[ilvl].append((container_tags[0], list_tag, block, list_type, tag_style)) + + [nd.finalize() for nd in lists.itervalues()] + definitions = {} + for defn in lists.itervalues(): + try: + defn = definitions[defn] + except KeyError: + definitions[defn] = defn + defn.num_id = len(definitions) - 1 + defn.link_blocks() + self.definitions = sorted(definitions.itervalues(), key=attrgetter('num_id')) + + def serialize(self, parent): + for defn in self.definitions: + defn.serialize(parent) + makeelement = self.namespace.makeelement + for defn in self.definitions: + n = makeelement(parent, 'w:num', w_numId=str(defn.num_id + 1)) + makeelement(n, 'w:abstractNumId', w_val=str(defn.num_id))