From 59d1054cbff4007b46aecd0d54ebdc1df9ae4152 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 20 Jun 2013 11:50:19 +0530 Subject: [PATCH] DOCX Input: Add support for images used as bullets --- src/calibre/ebooks/docx/images.py | 7 ++++--- src/calibre/ebooks/docx/numbering.py | 31 ++++++++++++++++++++++++---- src/calibre/ebooks/docx/to_html.py | 4 ++-- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/src/calibre/ebooks/docx/images.py b/src/calibre/ebooks/docx/images.py index e24b550797..ea3685316f 100644 --- a/src/calibre/ebooks/docx/images.py +++ b/src/calibre/ebooks/docx/images.py @@ -100,11 +100,12 @@ class Images(object): def __call__(self, relationships_by_id): self.rid_map = relationships_by_id - def generate_filename(self, rid, base=None): + def generate_filename(self, rid, base=None, rid_map=None): if rid in self.used: return self.used[rid] - raw = self.docx.read(self.rid_map[rid]) - base = base or ascii_filename(self.rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image' + rid_map = self.rid_map if rid_map is None else rid_map + raw = self.docx.read(rid_map[rid]) + base = base or ascii_filename(rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image' ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg' base = base.rpartition('.')[0] if not base: diff --git a/src/calibre/ebooks/docx/numbering.py b/src/calibre/ebooks/docx/numbering.py index 0178df3227..2bf86eea27 100644 --- a/src/calibre/ebooks/docx/numbering.py +++ b/src/calibre/ebooks/docx/numbering.py @@ -40,13 +40,14 @@ class Level(object): self.paragraph_style = self.character_style = None self.is_numbered = False self.num_template = None + self.pic_id = None if lvl is not None: self.read_from_xml(lvl) def copy(self): ans = Level() - for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style', 'is_numbered', 'num_template'): + for x in ('restart', 'pic_id', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style', 'is_numbered', 'num_template'): setattr(ans, x, getattr(self, x)) return ans @@ -80,6 +81,8 @@ class Level(object): if val == 'bullet': self.is_numbered = False self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc') + for lpid in XPath('./w:lvlPicBulletId[@w:val]')(lvl): + self.pic_id = get(lpid, 'w:val') else: self.is_numbered = True self.fmt = STYLE_MAP.get(val, 'decimal') @@ -103,6 +106,19 @@ class Level(object): else: self.character_style.update(ps) + def css(self, images, pic_map, rid_map): + ans = {'list-style-type': self.fmt} + if self.pic_id: + rid = pic_map.get(self.pic_id, None) + if rid: + try: + fname = images.generate_filename(rid, rid_map=rid_map) + except Exception: + fname = None + else: + ans['list-style-image'] = 'url("images/%s")' % fname + return ans + class NumberingDefinition(object): def __init__(self, parent=None): @@ -127,9 +143,16 @@ class Numbering(object): self.definitions = {} self.instances = {} self.counters = {} + self.pic_map = {} - def __call__(self, root, styles): + def __call__(self, root, styles, rid_map): ' Read all numbering style definitions ' + self.rid_map = rid_map + for npb in XPath('./w:numPicBullet[@w:numPicBulletId]')(root): + npbid = get(npb, 'w:numPicBulletId') + for idata in XPath('descendant::v:imagedata[@r:id]')(npb): + rid = get(idata, 'r:id') + self.pic_map[npbid] = rid lazy_load = {} for an in XPath('./w:abstractNum[@w:abstractNumId]')(root): an_id = get(an, 'w:abstractNumId') @@ -198,7 +221,7 @@ class Numbering(object): if (restart is None and ilvl == levelnum + 1) or restart == levelnum + 1: counter[ilvl] = lvl.start - def apply_markup(self, items, body, styles, object_map): + def apply_markup(self, items, body, styles, object_map, images): for p, num_id, ilvl in items: d = self.instances.get(num_id, None) if d is not None: @@ -232,7 +255,7 @@ class Numbering(object): if has_template: wrap.set('lvlid', lvlid) else: - wrap.set('class', styles.register({'list-style-type': lvl.fmt}, 'list')) + wrap.set('class', styles.register(lvl.css(images, self.pic_map, self.rid_map), 'list')) parent.insert(idx, wrap) last_val = None for child in current_run: diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 963d1fc6c8..79020d9c0a 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -137,7 +137,7 @@ class Convert(object): except (TypeError, ValueError): lvl = 0 numbered.append((html_obj, num_id, lvl)) - self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map) + self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map, self.images) self.apply_frames() if len(self.body) > 0: @@ -263,7 +263,7 @@ class Convert(object): except KeyError: self.log.warn('Numbering styles %s do not exist' % nname) else: - numbering(fromstring(raw), self.styles) + numbering(fromstring(raw), self.styles, self.docx.get_relationships(nname)[0]) self.styles.resolve_numbering(numbering)