mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Input: Lists work
This commit is contained in:
parent
cc223574d0
commit
ed422c7b0f
@ -175,6 +175,20 @@ def read_shd(parent, dest):
|
||||
if val:
|
||||
ans = simple_color(val, auto='transparent')
|
||||
setattr(dest, 'background_color', ans)
|
||||
|
||||
def read_numbering(parent, dest):
|
||||
lvl = num_id = None
|
||||
for np in XPath('./w:numPr')(parent):
|
||||
for ilvl in XPath('./w:ilvl[@w:val]')(np):
|
||||
try:
|
||||
lvl = int(get(ilvl, 'w:val'))
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
for num in XPath('./w:numId[@w:val]')(np):
|
||||
num_id = get(num, 'w:val')
|
||||
val = (num_id, lvl) if num_id is not None or lvl is not None else inherit
|
||||
setattr(dest, 'numbering', val)
|
||||
|
||||
# }}}
|
||||
|
||||
class ParagraphStyle(object):
|
||||
@ -194,6 +208,7 @@ class ParagraphStyle(object):
|
||||
|
||||
# Misc.
|
||||
'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
|
||||
'numbering',
|
||||
)
|
||||
|
||||
def __init__(self, pPr=None):
|
||||
@ -210,7 +225,7 @@ class ParagraphStyle(object):
|
||||
):
|
||||
setattr(self, p, binary_property(pPr, p))
|
||||
|
||||
for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd'):
|
||||
for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering'):
|
||||
f = globals()['read_%s' % x]
|
||||
f(pPr, self)
|
||||
|
||||
|
@ -6,6 +6,11 @@ from __future__ import (unicode_literals, division, absolute_import,
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import re
|
||||
from collections import Counter
|
||||
|
||||
from lxml.html.builder import OL, UL, SPAN
|
||||
|
||||
from calibre.ebooks.docx.block_styles import ParagraphStyle
|
||||
from calibre.ebooks.docx.char_styles import RunStyle
|
||||
from calibre.ebooks.docx.names import XPath, get
|
||||
@ -33,10 +38,26 @@ class Level(object):
|
||||
self.fmt = 'decimal'
|
||||
self.para_link = None
|
||||
self.paragraph_style = self.character_style = None
|
||||
self.is_numbered = False
|
||||
self.num_template = None
|
||||
|
||||
if lvl is not None:
|
||||
self.read_from_xml(lvl)
|
||||
|
||||
def copy(self):
|
||||
ans = Level()
|
||||
for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style', 'is_numbered', 'num_template'):
|
||||
setattr(ans, x, getattr(self, x))
|
||||
return ans
|
||||
|
||||
def format_template(self, counter, ilvl):
|
||||
def sub(m):
|
||||
x = int(m.group(1)) - 1
|
||||
if x > ilvl or x not in counter:
|
||||
return ''
|
||||
return '%d' % (counter[x] - (0 if x == ilvl else 1))
|
||||
return re.sub(r'%(\d+)', sub, self.num_template).rstrip() + '\xa0'
|
||||
|
||||
def read_from_xml(self, lvl, override=False):
|
||||
for lr in XPath('./w:lvlRestart[@w:val]')(lvl):
|
||||
try:
|
||||
@ -57,9 +78,13 @@ class Level(object):
|
||||
for lr in XPath('./w:numFmt[@w:val]')(lvl):
|
||||
val = get(lr, 'w:val')
|
||||
if val == 'bullet':
|
||||
self.is_numbered = False
|
||||
self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc')
|
||||
else:
|
||||
self.is_numbered = True
|
||||
self.fmt = STYLE_MAP.get(val, 'decimal')
|
||||
if lt and re.match(r'%\d+\.$', lt) is None:
|
||||
self.num_template = lt
|
||||
|
||||
for lr in XPath('./w:pStyle[@w:val]')(lvl):
|
||||
self.para_link = get(lr, 'w:val')
|
||||
@ -78,12 +103,6 @@ class Level(object):
|
||||
else:
|
||||
self.character_style.update(ps)
|
||||
|
||||
def copy(self):
|
||||
ans = Level()
|
||||
for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style'):
|
||||
setattr(ans, x, getattr(self, x))
|
||||
return ans
|
||||
|
||||
class NumberingDefinition(object):
|
||||
|
||||
def __init__(self, parent=None):
|
||||
@ -107,6 +126,7 @@ class Numbering(object):
|
||||
def __init__(self):
|
||||
self.definitions = {}
|
||||
self.instances = {}
|
||||
self.counters = {}
|
||||
|
||||
def __call__(self, root, styles):
|
||||
' Read all numbering style definitions '
|
||||
@ -131,6 +151,7 @@ class Numbering(object):
|
||||
if alvl is None:
|
||||
alvl = Level()
|
||||
alvl.read_from_xml(lvl, override=True)
|
||||
return nd
|
||||
|
||||
next_pass = {}
|
||||
for n in XPath('./w:num[@w:numId]')(root):
|
||||
@ -154,3 +175,114 @@ class Numbering(object):
|
||||
if d is not None:
|
||||
self.instances[num_id] = create_instance(n, d)
|
||||
|
||||
for num_id, d in self.instances.iteritems():
|
||||
self.counters[num_id] = Counter({lvl:d.levels[lvl].start for lvl in d.levels})
|
||||
|
||||
def get_pstyle(self, num_id, style_id):
|
||||
d = self.instances.get(num_id, None)
|
||||
if d is not None:
|
||||
for ilvl, lvl in d.levels.iteritems():
|
||||
if lvl.para_link == style_id:
|
||||
return ilvl
|
||||
|
||||
def get_para_style(self, num_id, lvl):
|
||||
d = self.instances.get(num_id, None)
|
||||
if d is not None:
|
||||
lvl = d.levels.get(lvl, None)
|
||||
return getattr(lvl, 'paragraph_style', None)
|
||||
|
||||
def update_counter(self, counter, levelnum, levels):
|
||||
counter[levelnum] += 1
|
||||
for ilvl, lvl in levels.iteritems():
|
||||
restart = lvl.restart
|
||||
if (restart is None and ilvl == levelnum + 1) or restart == levelnum + 1:
|
||||
counter[ilvl] = lvl.start
|
||||
|
||||
def apply_markup(self, items, body, styles, object_map):
|
||||
for p, num_id, ilvl in items:
|
||||
d = self.instances.get(num_id, None)
|
||||
if d is not None:
|
||||
lvl = d.levels.get(ilvl, None)
|
||||
if lvl is not None:
|
||||
counter = self.counters[num_id]
|
||||
p.tag = 'li'
|
||||
p.set('value', '%s' % counter[ilvl])
|
||||
p.set('list-lvl', str(ilvl))
|
||||
p.set('list-id', num_id)
|
||||
if lvl.num_template is not None:
|
||||
val = lvl.format_template(counter, ilvl)
|
||||
p.set('list-template', val)
|
||||
self.update_counter(counter, ilvl, d.levels)
|
||||
|
||||
def commit(current_run):
|
||||
if not current_run:
|
||||
return
|
||||
start = current_run[0]
|
||||
parent = start.getparent()
|
||||
idx = parent.index(start)
|
||||
|
||||
d = self.instances[start.get('list-id')]
|
||||
ilvl = int(start.get('list-lvl'))
|
||||
lvl = d.levels[ilvl]
|
||||
lvlid = start.get('list-id') + start.get('list-lvl')
|
||||
wrap = (OL if lvl.is_numbered else UL)('\n\t')
|
||||
has_template = 'list-template' in start.attrib
|
||||
if has_template:
|
||||
wrap.set('lvlid', lvlid)
|
||||
else:
|
||||
wrap.set('class', styles.register({'list-style-type': lvl.fmt}, 'list'))
|
||||
parent.insert(idx, wrap)
|
||||
last_val = None
|
||||
for child in current_run:
|
||||
wrap.append(child)
|
||||
child.tail = '\n\t'
|
||||
if has_template:
|
||||
span = SPAN()
|
||||
span.text = child.text
|
||||
child.text = None
|
||||
for gc in child:
|
||||
span.append(gc)
|
||||
child.append(span)
|
||||
span = SPAN(child.get('list-template'))
|
||||
child.insert(0, span)
|
||||
for attr in ('list-lvl', 'list-id', 'list-template'):
|
||||
child.attrib.pop(attr, None)
|
||||
val = int(child.get('value'))
|
||||
if last_val == val - 1 or wrap.tag == 'ul':
|
||||
child.attrib.pop('value')
|
||||
last_val = val
|
||||
current_run[-1].tail = '\n'
|
||||
del current_run[:]
|
||||
|
||||
parents = set()
|
||||
for child in body.iterdescendants('li'):
|
||||
parents.add(child.getparent())
|
||||
|
||||
for parent in parents:
|
||||
current_run = []
|
||||
for child in parent:
|
||||
if child.tag == 'li':
|
||||
if current_run:
|
||||
last = current_run[-1]
|
||||
if (last.get('list-id') , last.get('list-lvl')) != (child.get('list-id'), child.get('list-lvl')):
|
||||
commit(current_run)
|
||||
current_run.append(child)
|
||||
else:
|
||||
commit(current_run)
|
||||
commit(current_run)
|
||||
|
||||
for wrap in body.xpath('//ol[@lvlid]'):
|
||||
wrap.attrib.pop('lvlid')
|
||||
wrap.tag = 'div'
|
||||
for i, li in enumerate(wrap.iterchildren('li')):
|
||||
li.tag = 'div'
|
||||
li.attrib.pop('value', None)
|
||||
li.set('style', 'display:table-row')
|
||||
obj = object_map[li]
|
||||
bs = styles.para_cache[obj]
|
||||
if i == 0:
|
||||
wrap.set('style', 'display:table; margin-left: %s' % (bs.css.get('margin-left', 0)))
|
||||
bs.css.pop('margin-left', None)
|
||||
for child in li:
|
||||
child.set('style', 'display:table-cell')
|
||||
|
||||
|
@ -198,7 +198,18 @@ class Styles(object):
|
||||
if default_para.character_style is not None:
|
||||
self.para_char_cache[p] = default_para.character_style
|
||||
|
||||
is_numbering = direct_formatting.numbering is not inherit
|
||||
if is_numbering:
|
||||
num_id, lvl = direct_formatting.numbering
|
||||
if num_id is not None:
|
||||
p.set('calibre_num_id', '%s:%s' % (lvl, num_id))
|
||||
if num_id is not None and lvl is not None:
|
||||
ps = self.numbering.get_para_style(num_id, lvl)
|
||||
if ps is not None:
|
||||
parent_styles.append(ps)
|
||||
|
||||
for attr in ans.all_properties:
|
||||
if not (is_numbering and attr == 'text_indent'): # skip text-indent for lists
|
||||
setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr))
|
||||
return ans
|
||||
|
||||
@ -244,10 +255,20 @@ class Styles(object):
|
||||
return self.resolve_run(obj)
|
||||
|
||||
def resolve_numbering(self, numbering):
|
||||
pass # TODO: Implement this
|
||||
# When a numPr element appears inside a paragraph style, the lvl info
|
||||
# must be discarder and pStyle used instead.
|
||||
self.numbering = numbering
|
||||
for style in self:
|
||||
ps = style.paragraph_style
|
||||
if ps is not None and ps.numbering is not inherit:
|
||||
lvl = numbering.get_pstyle(ps.numbering[0], style.style_id)
|
||||
if lvl is None:
|
||||
ps.numbering = inherit
|
||||
else:
|
||||
ps.numbering = (ps.numbering[0], lvl)
|
||||
|
||||
def register(self, css, prefix):
|
||||
h = hash(tuple(css.iteritems()))
|
||||
h = hash(frozenset(css.iteritems()))
|
||||
ans, _ = self.classes.get(h, (None, None))
|
||||
if ans is None:
|
||||
self.counter[prefix] += 1
|
||||
@ -266,13 +287,15 @@ class Styles(object):
|
||||
self.register(css, 'text')
|
||||
|
||||
def class_name(self, css):
|
||||
h = hash(tuple(css.iteritems()))
|
||||
h = hash(frozenset(css.iteritems()))
|
||||
return self.classes.get(h, (None, None))[0]
|
||||
|
||||
def generate_css(self):
|
||||
prefix = textwrap.dedent(
|
||||
'''\
|
||||
p { margin: 0; padding: 0; text-indent: 1.5em }
|
||||
p { text-indent: 1.5em }
|
||||
|
||||
ul, ol, p { margin: 0; padding: 0 }
|
||||
''')
|
||||
|
||||
ans = []
|
||||
|
@ -7,6 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import sys, os, re
|
||||
from collections import OrderedDict
|
||||
|
||||
from lxml import html
|
||||
from lxml.html.builder import (
|
||||
@ -36,7 +37,7 @@ class Convert(object):
|
||||
self.mi = self.docx.metadata
|
||||
self.body = BODY()
|
||||
self.styles = Styles()
|
||||
self.object_map = {}
|
||||
self.object_map = OrderedDict()
|
||||
self.html = HTML(
|
||||
HEAD(
|
||||
META(charset='utf-8'),
|
||||
@ -72,6 +73,19 @@ class Convert(object):
|
||||
pass # TODO: Last section properties
|
||||
else:
|
||||
self.log.debug('Unknown top-level tag: %s, ignoring' % barename(top_level.tag))
|
||||
|
||||
numbered = []
|
||||
for html_obj, obj in self.object_map.iteritems():
|
||||
raw = obj.get('calibre_num_id', None)
|
||||
if raw is not None:
|
||||
lvl, num_id = raw.partition(':')[0::2]
|
||||
try:
|
||||
lvl = int(lvl)
|
||||
except (TypeError, ValueError):
|
||||
lvl = 0
|
||||
numbered.append((html_obj, num_id, lvl))
|
||||
self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map)
|
||||
|
||||
if len(self.body) > 0:
|
||||
self.body.text = '\n\t'
|
||||
for child in self.body:
|
||||
@ -102,7 +116,7 @@ class Convert(object):
|
||||
|
||||
nname = get_name(NUMBERING, 'numbering.xml')
|
||||
sname = get_name(STYLES, 'styles.xml')
|
||||
numbering = Numbering()
|
||||
numbering = self.numbering = Numbering()
|
||||
|
||||
if sname is not None:
|
||||
try:
|
||||
@ -133,6 +147,7 @@ class Convert(object):
|
||||
|
||||
def convert_p(self, p):
|
||||
dest = P()
|
||||
self.object_map[dest] = p
|
||||
style = self.styles.resolve_paragraph(p)
|
||||
for run in XPath('descendant::w:r')(p):
|
||||
span = self.convert_run(run)
|
||||
@ -173,7 +188,6 @@ class Convert(object):
|
||||
wrapper = self.wrap_elems(spans, SPAN())
|
||||
wrapper.set('class', cls)
|
||||
|
||||
self.object_map[dest] = p
|
||||
return dest
|
||||
|
||||
def wrap_elems(self, elems, wrapper):
|
||||
@ -188,7 +202,7 @@ class Convert(object):
|
||||
|
||||
def convert_run(self, run):
|
||||
ans = SPAN()
|
||||
ans.run = run
|
||||
self.object_map[ans] = run
|
||||
text = Text(ans, 'text', [])
|
||||
|
||||
for child in run:
|
||||
@ -224,7 +238,6 @@ class Convert(object):
|
||||
ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
|
||||
if style.lang is not inherit:
|
||||
ans.lang = style.lang
|
||||
self.object_map[ans] = run
|
||||
return ans
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Loading…
x
Reference in New Issue
Block a user