DOCX Input: Handle numbering styles that use non-decimal number formats and custom templates. Fixes #1519962 [Wrong list conversion from DOCX](https://bugs.launchpad.net/calibre/+bug/1519962)

This commit is contained in:
Kovid Goyal 2015-11-26 21:35:32 +05:30
parent 82f5e48d7b
commit b0c2fd900c

View File

@ -6,13 +6,15 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import re import re, string
from collections import Counter, defaultdict from collections import Counter, defaultdict
from functools import partial
from lxml.html.builder import OL, UL, SPAN from lxml.html.builder import OL, UL, SPAN
from calibre.ebooks.docx.block_styles import ParagraphStyle from calibre.ebooks.docx.block_styles import ParagraphStyle
from calibre.ebooks.docx.char_styles import RunStyle, inherit from calibre.ebooks.docx.char_styles import RunStyle, inherit
from calibre.ebooks.metadata import roman
STYLE_MAP = { STYLE_MAP = {
'aiueo': 'hiragana', 'aiueo': 'hiragana',
@ -29,6 +31,16 @@ STYLE_MAP = {
'decimalZero': 'decimal-leading-zero', 'decimalZero': 'decimal-leading-zero',
} }
def alphabet(val, lower=True):
x = string.ascii_lowercase if lower else string.ascii_uppercase
return x[(abs(val - 1)) % len(x)]
alphabet_map = {
'lower-alpha':alphabet, 'upper-alpha':partial(alphabet, lower=False),
'lower-roman':lambda x:roman(x).lower(), 'upper-roman':roman,
'decimal-leading-zero': lambda x: '0%d' % x
}
class Level(object): class Level(object):
def __init__(self, namespace, lvl=None): def __init__(self, namespace, lvl=None):
@ -57,7 +69,9 @@ class Level(object):
x = int(m.group(1)) - 1 x = int(m.group(1)) - 1
if x > ilvl or x not in counter: if x > ilvl or x not in counter:
return '' return ''
return '%d' % (counter[x] - (0 if x == ilvl else 1)) val = counter[x] - (0 if x == ilvl else 1)
formatter = alphabet_map.get(self.fmt, lambda x: '%d' % x)
return formatter(val)
return re.sub(r'%(\d+)', sub, template).rstrip() + '\xa0' return re.sub(r'%(\d+)', sub, template).rstrip() + '\xa0'
def read_from_xml(self, lvl, override=False): def read_from_xml(self, lvl, override=False):