mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Input: Lists work
This commit is contained in:
parent
cc223574d0
commit
ed422c7b0f
@ -175,6 +175,20 @@ def read_shd(parent, dest):
|
|||||||
if val:
|
if val:
|
||||||
ans = simple_color(val, auto='transparent')
|
ans = simple_color(val, auto='transparent')
|
||||||
setattr(dest, 'background_color', ans)
|
setattr(dest, 'background_color', ans)
|
||||||
|
|
||||||
|
def read_numbering(parent, dest):
|
||||||
|
lvl = num_id = None
|
||||||
|
for np in XPath('./w:numPr')(parent):
|
||||||
|
for ilvl in XPath('./w:ilvl[@w:val]')(np):
|
||||||
|
try:
|
||||||
|
lvl = int(get(ilvl, 'w:val'))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
for num in XPath('./w:numId[@w:val]')(np):
|
||||||
|
num_id = get(num, 'w:val')
|
||||||
|
val = (num_id, lvl) if num_id is not None or lvl is not None else inherit
|
||||||
|
setattr(dest, 'numbering', val)
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class ParagraphStyle(object):
|
class ParagraphStyle(object):
|
||||||
@ -194,6 +208,7 @@ class ParagraphStyle(object):
|
|||||||
|
|
||||||
# Misc.
|
# Misc.
|
||||||
'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
|
'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
|
||||||
|
'numbering',
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, pPr=None):
|
def __init__(self, pPr=None):
|
||||||
@ -210,7 +225,7 @@ class ParagraphStyle(object):
|
|||||||
):
|
):
|
||||||
setattr(self, p, binary_property(pPr, p))
|
setattr(self, p, binary_property(pPr, p))
|
||||||
|
|
||||||
for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd'):
|
for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering'):
|
||||||
f = globals()['read_%s' % x]
|
f = globals()['read_%s' % x]
|
||||||
f(pPr, self)
|
f(pPr, self)
|
||||||
|
|
||||||
|
@ -6,6 +6,11 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
import re
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
from lxml.html.builder import OL, UL, SPAN
|
||||||
|
|
||||||
from calibre.ebooks.docx.block_styles import ParagraphStyle
|
from calibre.ebooks.docx.block_styles import ParagraphStyle
|
||||||
from calibre.ebooks.docx.char_styles import RunStyle
|
from calibre.ebooks.docx.char_styles import RunStyle
|
||||||
from calibre.ebooks.docx.names import XPath, get
|
from calibre.ebooks.docx.names import XPath, get
|
||||||
@ -33,10 +38,26 @@ class Level(object):
|
|||||||
self.fmt = 'decimal'
|
self.fmt = 'decimal'
|
||||||
self.para_link = None
|
self.para_link = None
|
||||||
self.paragraph_style = self.character_style = None
|
self.paragraph_style = self.character_style = None
|
||||||
|
self.is_numbered = False
|
||||||
|
self.num_template = None
|
||||||
|
|
||||||
if lvl is not None:
|
if lvl is not None:
|
||||||
self.read_from_xml(lvl)
|
self.read_from_xml(lvl)
|
||||||
|
|
||||||
|
def copy(self):
|
||||||
|
ans = Level()
|
||||||
|
for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style', 'is_numbered', 'num_template'):
|
||||||
|
setattr(ans, x, getattr(self, x))
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def format_template(self, counter, ilvl):
|
||||||
|
def sub(m):
|
||||||
|
x = int(m.group(1)) - 1
|
||||||
|
if x > ilvl or x not in counter:
|
||||||
|
return ''
|
||||||
|
return '%d' % (counter[x] - (0 if x == ilvl else 1))
|
||||||
|
return re.sub(r'%(\d+)', sub, self.num_template).rstrip() + '\xa0'
|
||||||
|
|
||||||
def read_from_xml(self, lvl, override=False):
|
def read_from_xml(self, lvl, override=False):
|
||||||
for lr in XPath('./w:lvlRestart[@w:val]')(lvl):
|
for lr in XPath('./w:lvlRestart[@w:val]')(lvl):
|
||||||
try:
|
try:
|
||||||
@ -57,9 +78,13 @@ class Level(object):
|
|||||||
for lr in XPath('./w:numFmt[@w:val]')(lvl):
|
for lr in XPath('./w:numFmt[@w:val]')(lvl):
|
||||||
val = get(lr, 'w:val')
|
val = get(lr, 'w:val')
|
||||||
if val == 'bullet':
|
if val == 'bullet':
|
||||||
|
self.is_numbered = False
|
||||||
self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc')
|
self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc')
|
||||||
else:
|
else:
|
||||||
|
self.is_numbered = True
|
||||||
self.fmt = STYLE_MAP.get(val, 'decimal')
|
self.fmt = STYLE_MAP.get(val, 'decimal')
|
||||||
|
if lt and re.match(r'%\d+\.$', lt) is None:
|
||||||
|
self.num_template = lt
|
||||||
|
|
||||||
for lr in XPath('./w:pStyle[@w:val]')(lvl):
|
for lr in XPath('./w:pStyle[@w:val]')(lvl):
|
||||||
self.para_link = get(lr, 'w:val')
|
self.para_link = get(lr, 'w:val')
|
||||||
@ -78,12 +103,6 @@ class Level(object):
|
|||||||
else:
|
else:
|
||||||
self.character_style.update(ps)
|
self.character_style.update(ps)
|
||||||
|
|
||||||
def copy(self):
|
|
||||||
ans = Level()
|
|
||||||
for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style'):
|
|
||||||
setattr(ans, x, getattr(self, x))
|
|
||||||
return ans
|
|
||||||
|
|
||||||
class NumberingDefinition(object):
|
class NumberingDefinition(object):
|
||||||
|
|
||||||
def __init__(self, parent=None):
|
def __init__(self, parent=None):
|
||||||
@ -107,6 +126,7 @@ class Numbering(object):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.definitions = {}
|
self.definitions = {}
|
||||||
self.instances = {}
|
self.instances = {}
|
||||||
|
self.counters = {}
|
||||||
|
|
||||||
def __call__(self, root, styles):
|
def __call__(self, root, styles):
|
||||||
' Read all numbering style definitions '
|
' Read all numbering style definitions '
|
||||||
@ -131,6 +151,7 @@ class Numbering(object):
|
|||||||
if alvl is None:
|
if alvl is None:
|
||||||
alvl = Level()
|
alvl = Level()
|
||||||
alvl.read_from_xml(lvl, override=True)
|
alvl.read_from_xml(lvl, override=True)
|
||||||
|
return nd
|
||||||
|
|
||||||
next_pass = {}
|
next_pass = {}
|
||||||
for n in XPath('./w:num[@w:numId]')(root):
|
for n in XPath('./w:num[@w:numId]')(root):
|
||||||
@ -154,3 +175,114 @@ class Numbering(object):
|
|||||||
if d is not None:
|
if d is not None:
|
||||||
self.instances[num_id] = create_instance(n, d)
|
self.instances[num_id] = create_instance(n, d)
|
||||||
|
|
||||||
|
for num_id, d in self.instances.iteritems():
|
||||||
|
self.counters[num_id] = Counter({lvl:d.levels[lvl].start for lvl in d.levels})
|
||||||
|
|
||||||
|
def get_pstyle(self, num_id, style_id):
|
||||||
|
d = self.instances.get(num_id, None)
|
||||||
|
if d is not None:
|
||||||
|
for ilvl, lvl in d.levels.iteritems():
|
||||||
|
if lvl.para_link == style_id:
|
||||||
|
return ilvl
|
||||||
|
|
||||||
|
def get_para_style(self, num_id, lvl):
|
||||||
|
d = self.instances.get(num_id, None)
|
||||||
|
if d is not None:
|
||||||
|
lvl = d.levels.get(lvl, None)
|
||||||
|
return getattr(lvl, 'paragraph_style', None)
|
||||||
|
|
||||||
|
def update_counter(self, counter, levelnum, levels):
|
||||||
|
counter[levelnum] += 1
|
||||||
|
for ilvl, lvl in levels.iteritems():
|
||||||
|
restart = lvl.restart
|
||||||
|
if (restart is None and ilvl == levelnum + 1) or restart == levelnum + 1:
|
||||||
|
counter[ilvl] = lvl.start
|
||||||
|
|
||||||
|
def apply_markup(self, items, body, styles, object_map):
|
||||||
|
for p, num_id, ilvl in items:
|
||||||
|
d = self.instances.get(num_id, None)
|
||||||
|
if d is not None:
|
||||||
|
lvl = d.levels.get(ilvl, None)
|
||||||
|
if lvl is not None:
|
||||||
|
counter = self.counters[num_id]
|
||||||
|
p.tag = 'li'
|
||||||
|
p.set('value', '%s' % counter[ilvl])
|
||||||
|
p.set('list-lvl', str(ilvl))
|
||||||
|
p.set('list-id', num_id)
|
||||||
|
if lvl.num_template is not None:
|
||||||
|
val = lvl.format_template(counter, ilvl)
|
||||||
|
p.set('list-template', val)
|
||||||
|
self.update_counter(counter, ilvl, d.levels)
|
||||||
|
|
||||||
|
def commit(current_run):
|
||||||
|
if not current_run:
|
||||||
|
return
|
||||||
|
start = current_run[0]
|
||||||
|
parent = start.getparent()
|
||||||
|
idx = parent.index(start)
|
||||||
|
|
||||||
|
d = self.instances[start.get('list-id')]
|
||||||
|
ilvl = int(start.get('list-lvl'))
|
||||||
|
lvl = d.levels[ilvl]
|
||||||
|
lvlid = start.get('list-id') + start.get('list-lvl')
|
||||||
|
wrap = (OL if lvl.is_numbered else UL)('\n\t')
|
||||||
|
has_template = 'list-template' in start.attrib
|
||||||
|
if has_template:
|
||||||
|
wrap.set('lvlid', lvlid)
|
||||||
|
else:
|
||||||
|
wrap.set('class', styles.register({'list-style-type': lvl.fmt}, 'list'))
|
||||||
|
parent.insert(idx, wrap)
|
||||||
|
last_val = None
|
||||||
|
for child in current_run:
|
||||||
|
wrap.append(child)
|
||||||
|
child.tail = '\n\t'
|
||||||
|
if has_template:
|
||||||
|
span = SPAN()
|
||||||
|
span.text = child.text
|
||||||
|
child.text = None
|
||||||
|
for gc in child:
|
||||||
|
span.append(gc)
|
||||||
|
child.append(span)
|
||||||
|
span = SPAN(child.get('list-template'))
|
||||||
|
child.insert(0, span)
|
||||||
|
for attr in ('list-lvl', 'list-id', 'list-template'):
|
||||||
|
child.attrib.pop(attr, None)
|
||||||
|
val = int(child.get('value'))
|
||||||
|
if last_val == val - 1 or wrap.tag == 'ul':
|
||||||
|
child.attrib.pop('value')
|
||||||
|
last_val = val
|
||||||
|
current_run[-1].tail = '\n'
|
||||||
|
del current_run[:]
|
||||||
|
|
||||||
|
parents = set()
|
||||||
|
for child in body.iterdescendants('li'):
|
||||||
|
parents.add(child.getparent())
|
||||||
|
|
||||||
|
for parent in parents:
|
||||||
|
current_run = []
|
||||||
|
for child in parent:
|
||||||
|
if child.tag == 'li':
|
||||||
|
if current_run:
|
||||||
|
last = current_run[-1]
|
||||||
|
if (last.get('list-id') , last.get('list-lvl')) != (child.get('list-id'), child.get('list-lvl')):
|
||||||
|
commit(current_run)
|
||||||
|
current_run.append(child)
|
||||||
|
else:
|
||||||
|
commit(current_run)
|
||||||
|
commit(current_run)
|
||||||
|
|
||||||
|
for wrap in body.xpath('//ol[@lvlid]'):
|
||||||
|
wrap.attrib.pop('lvlid')
|
||||||
|
wrap.tag = 'div'
|
||||||
|
for i, li in enumerate(wrap.iterchildren('li')):
|
||||||
|
li.tag = 'div'
|
||||||
|
li.attrib.pop('value', None)
|
||||||
|
li.set('style', 'display:table-row')
|
||||||
|
obj = object_map[li]
|
||||||
|
bs = styles.para_cache[obj]
|
||||||
|
if i == 0:
|
||||||
|
wrap.set('style', 'display:table; margin-left: %s' % (bs.css.get('margin-left', 0)))
|
||||||
|
bs.css.pop('margin-left', None)
|
||||||
|
for child in li:
|
||||||
|
child.set('style', 'display:table-cell')
|
||||||
|
|
||||||
|
@ -198,7 +198,18 @@ class Styles(object):
|
|||||||
if default_para.character_style is not None:
|
if default_para.character_style is not None:
|
||||||
self.para_char_cache[p] = default_para.character_style
|
self.para_char_cache[p] = default_para.character_style
|
||||||
|
|
||||||
|
is_numbering = direct_formatting.numbering is not inherit
|
||||||
|
if is_numbering:
|
||||||
|
num_id, lvl = direct_formatting.numbering
|
||||||
|
if num_id is not None:
|
||||||
|
p.set('calibre_num_id', '%s:%s' % (lvl, num_id))
|
||||||
|
if num_id is not None and lvl is not None:
|
||||||
|
ps = self.numbering.get_para_style(num_id, lvl)
|
||||||
|
if ps is not None:
|
||||||
|
parent_styles.append(ps)
|
||||||
|
|
||||||
for attr in ans.all_properties:
|
for attr in ans.all_properties:
|
||||||
|
if not (is_numbering and attr == 'text_indent'): # skip text-indent for lists
|
||||||
setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr))
|
setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr))
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
@ -244,10 +255,20 @@ class Styles(object):
|
|||||||
return self.resolve_run(obj)
|
return self.resolve_run(obj)
|
||||||
|
|
||||||
def resolve_numbering(self, numbering):
|
def resolve_numbering(self, numbering):
|
||||||
pass # TODO: Implement this
|
# When a numPr element appears inside a paragraph style, the lvl info
|
||||||
|
# must be discarder and pStyle used instead.
|
||||||
|
self.numbering = numbering
|
||||||
|
for style in self:
|
||||||
|
ps = style.paragraph_style
|
||||||
|
if ps is not None and ps.numbering is not inherit:
|
||||||
|
lvl = numbering.get_pstyle(ps.numbering[0], style.style_id)
|
||||||
|
if lvl is None:
|
||||||
|
ps.numbering = inherit
|
||||||
|
else:
|
||||||
|
ps.numbering = (ps.numbering[0], lvl)
|
||||||
|
|
||||||
def register(self, css, prefix):
|
def register(self, css, prefix):
|
||||||
h = hash(tuple(css.iteritems()))
|
h = hash(frozenset(css.iteritems()))
|
||||||
ans, _ = self.classes.get(h, (None, None))
|
ans, _ = self.classes.get(h, (None, None))
|
||||||
if ans is None:
|
if ans is None:
|
||||||
self.counter[prefix] += 1
|
self.counter[prefix] += 1
|
||||||
@ -266,13 +287,15 @@ class Styles(object):
|
|||||||
self.register(css, 'text')
|
self.register(css, 'text')
|
||||||
|
|
||||||
def class_name(self, css):
|
def class_name(self, css):
|
||||||
h = hash(tuple(css.iteritems()))
|
h = hash(frozenset(css.iteritems()))
|
||||||
return self.classes.get(h, (None, None))[0]
|
return self.classes.get(h, (None, None))[0]
|
||||||
|
|
||||||
def generate_css(self):
|
def generate_css(self):
|
||||||
prefix = textwrap.dedent(
|
prefix = textwrap.dedent(
|
||||||
'''\
|
'''\
|
||||||
p { margin: 0; padding: 0; text-indent: 1.5em }
|
p { text-indent: 1.5em }
|
||||||
|
|
||||||
|
ul, ol, p { margin: 0; padding: 0 }
|
||||||
''')
|
''')
|
||||||
|
|
||||||
ans = []
|
ans = []
|
||||||
|
@ -7,6 +7,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import sys, os, re
|
import sys, os, re
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from lxml.html.builder import (
|
from lxml.html.builder import (
|
||||||
@ -36,7 +37,7 @@ class Convert(object):
|
|||||||
self.mi = self.docx.metadata
|
self.mi = self.docx.metadata
|
||||||
self.body = BODY()
|
self.body = BODY()
|
||||||
self.styles = Styles()
|
self.styles = Styles()
|
||||||
self.object_map = {}
|
self.object_map = OrderedDict()
|
||||||
self.html = HTML(
|
self.html = HTML(
|
||||||
HEAD(
|
HEAD(
|
||||||
META(charset='utf-8'),
|
META(charset='utf-8'),
|
||||||
@ -72,6 +73,19 @@ class Convert(object):
|
|||||||
pass # TODO: Last section properties
|
pass # TODO: Last section properties
|
||||||
else:
|
else:
|
||||||
self.log.debug('Unknown top-level tag: %s, ignoring' % barename(top_level.tag))
|
self.log.debug('Unknown top-level tag: %s, ignoring' % barename(top_level.tag))
|
||||||
|
|
||||||
|
numbered = []
|
||||||
|
for html_obj, obj in self.object_map.iteritems():
|
||||||
|
raw = obj.get('calibre_num_id', None)
|
||||||
|
if raw is not None:
|
||||||
|
lvl, num_id = raw.partition(':')[0::2]
|
||||||
|
try:
|
||||||
|
lvl = int(lvl)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
lvl = 0
|
||||||
|
numbered.append((html_obj, num_id, lvl))
|
||||||
|
self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map)
|
||||||
|
|
||||||
if len(self.body) > 0:
|
if len(self.body) > 0:
|
||||||
self.body.text = '\n\t'
|
self.body.text = '\n\t'
|
||||||
for child in self.body:
|
for child in self.body:
|
||||||
@ -102,7 +116,7 @@ class Convert(object):
|
|||||||
|
|
||||||
nname = get_name(NUMBERING, 'numbering.xml')
|
nname = get_name(NUMBERING, 'numbering.xml')
|
||||||
sname = get_name(STYLES, 'styles.xml')
|
sname = get_name(STYLES, 'styles.xml')
|
||||||
numbering = Numbering()
|
numbering = self.numbering = Numbering()
|
||||||
|
|
||||||
if sname is not None:
|
if sname is not None:
|
||||||
try:
|
try:
|
||||||
@ -133,6 +147,7 @@ class Convert(object):
|
|||||||
|
|
||||||
def convert_p(self, p):
|
def convert_p(self, p):
|
||||||
dest = P()
|
dest = P()
|
||||||
|
self.object_map[dest] = p
|
||||||
style = self.styles.resolve_paragraph(p)
|
style = self.styles.resolve_paragraph(p)
|
||||||
for run in XPath('descendant::w:r')(p):
|
for run in XPath('descendant::w:r')(p):
|
||||||
span = self.convert_run(run)
|
span = self.convert_run(run)
|
||||||
@ -173,7 +188,6 @@ class Convert(object):
|
|||||||
wrapper = self.wrap_elems(spans, SPAN())
|
wrapper = self.wrap_elems(spans, SPAN())
|
||||||
wrapper.set('class', cls)
|
wrapper.set('class', cls)
|
||||||
|
|
||||||
self.object_map[dest] = p
|
|
||||||
return dest
|
return dest
|
||||||
|
|
||||||
def wrap_elems(self, elems, wrapper):
|
def wrap_elems(self, elems, wrapper):
|
||||||
@ -188,7 +202,7 @@ class Convert(object):
|
|||||||
|
|
||||||
def convert_run(self, run):
|
def convert_run(self, run):
|
||||||
ans = SPAN()
|
ans = SPAN()
|
||||||
ans.run = run
|
self.object_map[ans] = run
|
||||||
text = Text(ans, 'text', [])
|
text = Text(ans, 'text', [])
|
||||||
|
|
||||||
for child in run:
|
for child in run:
|
||||||
@ -224,7 +238,6 @@ class Convert(object):
|
|||||||
ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
|
ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
|
||||||
if style.lang is not inherit:
|
if style.lang is not inherit:
|
||||||
ans.lang = style.lang
|
ans.lang = style.lang
|
||||||
self.object_map[ans] = run
|
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user