mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
DOCX Input: Work on lists
This commit is contained in:
parent
4cfeaac8b4
commit
3a021b5873
@ -12,6 +12,7 @@ DOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/
|
||||
DOCPROPS = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties'
|
||||
APPPROPS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
|
||||
STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
|
||||
NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering'
|
||||
|
||||
namespaces = {
|
||||
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
||||
|
156
src/calibre/ebooks/docx/numbering.py
Normal file
156
src/calibre/ebooks/docx/numbering.py
Normal file
@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from calibre.ebooks.docx.block_styles import ParagraphStyle
|
||||
from calibre.ebooks.docx.char_styles import RunStyle
|
||||
from calibre.ebooks.docx.names import XPath, get
|
||||
|
||||
STYLE_MAP = {
|
||||
'aiueo': 'hiragana',
|
||||
'aiueoFullWidth': 'hiragana',
|
||||
'hebrew1': 'hebrew',
|
||||
'iroha': 'katakana-iroha',
|
||||
'irohaFullWidth': 'katakana-iroha',
|
||||
'lowerLetter': 'lower-alpha',
|
||||
'lowerRoman': 'lower-roman',
|
||||
'none': 'none',
|
||||
'upperLetter': 'upper-alpha',
|
||||
'upperRoman': 'upper-roman',
|
||||
'chineseCounting': 'cjk-ideographic',
|
||||
'decimalZero': 'decimal-leading-zero',
|
||||
}
|
||||
|
||||
class Level(object):
|
||||
|
||||
def __init__(self, lvl=None):
|
||||
self.restart = None
|
||||
self.start = 0
|
||||
self.fmt = 'decimal'
|
||||
self.para_link = None
|
||||
self.paragraph_style = self.character_style = None
|
||||
|
||||
if lvl is not None:
|
||||
self.read_from_xml(lvl)
|
||||
|
||||
def read_from_xml(self, lvl, override=False):
|
||||
for lr in XPath('./w:lvlRestart[@w:val]')(lvl):
|
||||
try:
|
||||
self.restart = int(get(lr, 'w:val'))
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
for lr in XPath('./w:start[@w:val]')(lvl):
|
||||
try:
|
||||
self.start = int(get(lr, 'w:val'))
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
lt = None
|
||||
for lr in XPath('./w:lvlText[@w:val]')(lvl):
|
||||
lt = get(lr, 'w:val')
|
||||
|
||||
for lr in XPath('./w:numFmt[@w:val]')(lvl):
|
||||
val = get(lr, 'w:val')
|
||||
if val == 'bullet':
|
||||
self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc')
|
||||
else:
|
||||
self.fmt = STYLE_MAP.get(val, 'decimal')
|
||||
|
||||
for lr in XPath('./w:pStyle[@w:val]')(lvl):
|
||||
self.para_link = get(lr, 'w:val')
|
||||
|
||||
for pPr in XPath('./w:pPr')(lvl):
|
||||
ps = ParagraphStyle(pPr)
|
||||
if self.paragraph_style is None:
|
||||
self.paragraph_style = ps
|
||||
else:
|
||||
self.paragraph_style.update(ps)
|
||||
|
||||
for rPr in XPath('./w:rPr')(lvl):
|
||||
ps = RunStyle(rPr)
|
||||
if self.character_style is None:
|
||||
self.character_style = ps
|
||||
else:
|
||||
self.character_style.update(ps)
|
||||
|
||||
def copy(self):
|
||||
ans = Level()
|
||||
for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style'):
|
||||
setattr(ans, x, getattr(self, x))
|
||||
return ans
|
||||
|
||||
class NumberingDefinition(object):
|
||||
|
||||
def __init__(self, parent=None):
|
||||
self.levels = {}
|
||||
if parent is not None:
|
||||
for lvl in XPath('./w:lvl')(parent):
|
||||
try:
|
||||
ilvl = int(get(lvl, 'w:ilvl', 0))
|
||||
except (TypeError, ValueError):
|
||||
ilvl = 0
|
||||
self.levels[ilvl] = Level(lvl)
|
||||
|
||||
def copy(self):
|
||||
ans = NumberingDefinition()
|
||||
for l, lvl in self.levels.iteritems():
|
||||
ans.levels[l] = lvl.copy()
|
||||
return ans
|
||||
|
||||
class Numbering(object):
|
||||
|
||||
def __init__(self):
|
||||
self.definitions = {}
|
||||
self.instances = {}
|
||||
|
||||
def __call__(self, root, styles):
|
||||
' Read all numbering style definitions '
|
||||
lazy_load = {}
|
||||
for an in XPath('./w:abstractNum[@w:abstractNumId]')(root):
|
||||
an_id = get(an, 'w:abstractNumId')
|
||||
nsl = XPath('./w:numStyleLink[@w:val]')(an)
|
||||
if nsl:
|
||||
lazy_load[an_id] = get(nsl[0], 'w:val')
|
||||
else:
|
||||
nd = NumberingDefinition(an)
|
||||
self.definitions[an_id] = nd
|
||||
|
||||
def create_instance(n, definition):
|
||||
nd = definition.copy()
|
||||
for lo in XPath('./w:lvlOverride')(n):
|
||||
ilvl = get(lo, 'w:ilvl')
|
||||
for lvl in XPath('./w:lvl')(lo)[:1]:
|
||||
nilvl = get(lvl, 'w:ilvl')
|
||||
ilvl = nilvl if ilvl is None else ilvl
|
||||
alvl = nd.levels.get(ilvl, None)
|
||||
if alvl is None:
|
||||
alvl = Level()
|
||||
alvl.read_from_xml(lvl, override=True)
|
||||
|
||||
next_pass = {}
|
||||
for n in XPath('./w:num[@w:numId]')(root):
|
||||
an_id = None
|
||||
num_id = get(n, 'w:numId')
|
||||
for an in XPath('./w:abstractNumId[@w:val]')(n):
|
||||
an_id = get(an, 'w:val')
|
||||
d = self.definitions.get(an_id, None)
|
||||
if d is None:
|
||||
next_pass[num_id] = (an_id, n)
|
||||
continue
|
||||
self.instances[num_id] = create_instance(n, d)
|
||||
|
||||
numbering_links = styles.numbering_style_links
|
||||
for an_id, style_link in lazy_load.iteritems():
|
||||
num_id = numbering_links[style_link]
|
||||
self.definitions[an_id] = self.instances[num_id].copy()
|
||||
|
||||
for num_id, (an_id, n) in next_pass.iteritems():
|
||||
d = self.definitions.get(an_id, None)
|
||||
if d is not None:
|
||||
self.instances[num_id] = create_instance(n, d)
|
||||
|
@ -52,6 +52,11 @@ class Style(object):
|
||||
else:
|
||||
self.character_style.update(rs)
|
||||
|
||||
if self.style_type == 'numbering':
|
||||
self.numbering_style_link = None
|
||||
for x in XPath('./w:pPr/w:numPr/w:numId[@w:val]')(elem):
|
||||
self.numbering_style_link = get(x, 'w:val')
|
||||
|
||||
def resolve_based_on(self, parent):
|
||||
if parent.paragraph_style is not None:
|
||||
if self.paragraph_style is None:
|
||||
@ -77,6 +82,7 @@ class Styles(object):
|
||||
self.classes = {}
|
||||
self.counter = Counter()
|
||||
self.default_styles = {}
|
||||
self.numbering_style_links = {}
|
||||
|
||||
def __iter__(self):
|
||||
for s in self.id_map.itervalues():
|
||||
@ -98,6 +104,8 @@ class Styles(object):
|
||||
self.id_map[s.style_id] = s
|
||||
if s.is_default:
|
||||
self.default_styles[s.style_type] = s
|
||||
if s.style_type == 'numbering' and s.numbering_style_link:
|
||||
self.numbering_style_links[s.style_id] = s.numbering_style_link
|
||||
|
||||
self.default_paragraph_style = self.default_character_style = None
|
||||
|
||||
@ -235,6 +243,9 @@ class Styles(object):
|
||||
if obj.tag.endswith('}r'):
|
||||
return self.resolve_run(obj)
|
||||
|
||||
def resolve_numbering(self, numbering):
|
||||
pass # TODO: Implement this
|
||||
|
||||
def register(self, css, prefix):
|
||||
h = hash(tuple(css.iteritems()))
|
||||
ans, _ = self.classes.get(h, (None, None))
|
||||
|
@ -13,8 +13,9 @@ from lxml.html.builder import (
|
||||
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
|
||||
|
||||
from calibre.ebooks.docx.container import DOCX, fromstring
|
||||
from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES
|
||||
from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING
|
||||
from calibre.ebooks.docx.styles import Styles, inherit
|
||||
from calibre.ebooks.docx.numbering import Numbering
|
||||
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||
|
||||
class Text:
|
||||
@ -89,12 +90,20 @@ class Convert(object):
|
||||
self.write()
|
||||
|
||||
def read_styles(self, relationships_by_type):
|
||||
sname = relationships_by_type.get(STYLES, None)
|
||||
if sname is None:
|
||||
name = self.docx.document_name.split('/')
|
||||
name[-1] = 'styles.xml'
|
||||
if self.docx.exists(name):
|
||||
sname = name
|
||||
|
||||
def get_name(rtype, defname):
|
||||
name = relationships_by_type.get(rtype, None)
|
||||
if name is None:
|
||||
cname = self.docx.document_name.split('/')
|
||||
cname[-1] = defname
|
||||
if self.docx.exists(cname):
|
||||
name = name
|
||||
return name
|
||||
|
||||
nname = get_name(NUMBERING, 'numbering.xml')
|
||||
sname = get_name(STYLES, 'styles.xml')
|
||||
numbering = Numbering()
|
||||
|
||||
if sname is not None:
|
||||
try:
|
||||
raw = self.docx.read(sname)
|
||||
@ -103,6 +112,16 @@ class Convert(object):
|
||||
else:
|
||||
self.styles(fromstring(raw))
|
||||
|
||||
if nname is not None:
|
||||
try:
|
||||
raw = self.docx.read(nname)
|
||||
except KeyError:
|
||||
self.log.warn('Numbering styles %s do not exist' % nname)
|
||||
else:
|
||||
numbering(fromstring(raw), self.styles)
|
||||
|
||||
self.styles.resolve_numbering(numbering)
|
||||
|
||||
def write(self):
|
||||
raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
|
||||
with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
|
||||
|
Loading…
x
Reference in New Issue
Block a user