DOCX Input: Add support for images used as bullets

This commit is contained in:
Kovid Goyal 2013-06-20 11:50:19 +05:30
parent b7d7a98fa6
commit 59d1054cbf
3 changed files with 33 additions and 9 deletions

View File

@ -100,11 +100,12 @@ class Images(object):
def __call__(self, relationships_by_id): def __call__(self, relationships_by_id):
self.rid_map = relationships_by_id self.rid_map = relationships_by_id
def generate_filename(self, rid, base=None): def generate_filename(self, rid, base=None, rid_map=None):
if rid in self.used: if rid in self.used:
return self.used[rid] return self.used[rid]
raw = self.docx.read(self.rid_map[rid]) rid_map = self.rid_map if rid_map is None else rid_map
base = base or ascii_filename(self.rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image' raw = self.docx.read(rid_map[rid])
base = base or ascii_filename(rid_map[rid].rpartition('/')[-1]).replace(' ', '_') or 'image'
ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg' ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
base = base.rpartition('.')[0] base = base.rpartition('.')[0]
if not base: if not base:

View File

@ -40,13 +40,14 @@ class Level(object):
self.paragraph_style = self.character_style = None self.paragraph_style = self.character_style = None
self.is_numbered = False self.is_numbered = False
self.num_template = None self.num_template = None
self.pic_id = None
if lvl is not None: if lvl is not None:
self.read_from_xml(lvl) self.read_from_xml(lvl)
def copy(self): def copy(self):
ans = Level() ans = Level()
for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style', 'is_numbered', 'num_template'): for x in ('restart', 'pic_id', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style', 'is_numbered', 'num_template'):
setattr(ans, x, getattr(self, x)) setattr(ans, x, getattr(self, x))
return ans return ans
@ -80,6 +81,8 @@ class Level(object):
if val == 'bullet': if val == 'bullet':
self.is_numbered = False self.is_numbered = False
self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc') self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc')
for lpid in XPath('./w:lvlPicBulletId[@w:val]')(lvl):
self.pic_id = get(lpid, 'w:val')
else: else:
self.is_numbered = True self.is_numbered = True
self.fmt = STYLE_MAP.get(val, 'decimal') self.fmt = STYLE_MAP.get(val, 'decimal')
@ -103,6 +106,19 @@ class Level(object):
else: else:
self.character_style.update(ps) self.character_style.update(ps)
def css(self, images, pic_map, rid_map):
ans = {'list-style-type': self.fmt}
if self.pic_id:
rid = pic_map.get(self.pic_id, None)
if rid:
try:
fname = images.generate_filename(rid, rid_map=rid_map)
except Exception:
fname = None
else:
ans['list-style-image'] = 'url("images/%s")' % fname
return ans
class NumberingDefinition(object): class NumberingDefinition(object):
def __init__(self, parent=None): def __init__(self, parent=None):
@ -127,9 +143,16 @@ class Numbering(object):
self.definitions = {} self.definitions = {}
self.instances = {} self.instances = {}
self.counters = {} self.counters = {}
self.pic_map = {}
def __call__(self, root, styles): def __call__(self, root, styles, rid_map):
' Read all numbering style definitions ' ' Read all numbering style definitions '
self.rid_map = rid_map
for npb in XPath('./w:numPicBullet[@w:numPicBulletId]')(root):
npbid = get(npb, 'w:numPicBulletId')
for idata in XPath('descendant::v:imagedata[@r:id]')(npb):
rid = get(idata, 'r:id')
self.pic_map[npbid] = rid
lazy_load = {} lazy_load = {}
for an in XPath('./w:abstractNum[@w:abstractNumId]')(root): for an in XPath('./w:abstractNum[@w:abstractNumId]')(root):
an_id = get(an, 'w:abstractNumId') an_id = get(an, 'w:abstractNumId')
@ -198,7 +221,7 @@ class Numbering(object):
if (restart is None and ilvl == levelnum + 1) or restart == levelnum + 1: if (restart is None and ilvl == levelnum + 1) or restart == levelnum + 1:
counter[ilvl] = lvl.start counter[ilvl] = lvl.start
def apply_markup(self, items, body, styles, object_map): def apply_markup(self, items, body, styles, object_map, images):
for p, num_id, ilvl in items: for p, num_id, ilvl in items:
d = self.instances.get(num_id, None) d = self.instances.get(num_id, None)
if d is not None: if d is not None:
@ -232,7 +255,7 @@ class Numbering(object):
if has_template: if has_template:
wrap.set('lvlid', lvlid) wrap.set('lvlid', lvlid)
else: else:
wrap.set('class', styles.register({'list-style-type': lvl.fmt}, 'list')) wrap.set('class', styles.register(lvl.css(images, self.pic_map, self.rid_map), 'list'))
parent.insert(idx, wrap) parent.insert(idx, wrap)
last_val = None last_val = None
for child in current_run: for child in current_run:

View File

@ -137,7 +137,7 @@ class Convert(object):
except (TypeError, ValueError): except (TypeError, ValueError):
lvl = 0 lvl = 0
numbered.append((html_obj, num_id, lvl)) numbered.append((html_obj, num_id, lvl))
self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map) self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map, self.images)
self.apply_frames() self.apply_frames()
if len(self.body) > 0: if len(self.body) > 0:
@ -263,7 +263,7 @@ class Convert(object):
except KeyError: except KeyError:
self.log.warn('Numbering styles %s do not exist' % nname) self.log.warn('Numbering styles %s do not exist' % nname)
else: else:
numbering(fromstring(raw), self.styles) numbering(fromstring(raw), self.styles, self.docx.get_relationships(nname)[0])
self.styles.resolve_numbering(numbering) self.styles.resolve_numbering(numbering)