mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-04 03:27:00 -05:00 
			
		
		
		
	DOCX Input: Work on lists
This commit is contained in:
		
							parent
							
								
									4cfeaac8b4
								
							
						
					
					
						commit
						3a021b5873
					
				@ -8,10 +8,11 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from lxml.etree import XPath as X
 | 
					from lxml.etree import XPath as X
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument'
 | 
					DOCUMENT  = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument'
 | 
				
			||||||
DOCPROPS = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties'
 | 
					DOCPROPS  = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties'
 | 
				
			||||||
APPPROPS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
 | 
					APPPROPS  = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties'
 | 
				
			||||||
STYLES   = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
 | 
					STYLES    = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'
 | 
				
			||||||
 | 
					NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespaces = {
 | 
					namespaces = {
 | 
				
			||||||
    'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
 | 
					    'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										156
									
								
								src/calibre/ebooks/docx/numbering.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										156
									
								
								src/calibre/ebooks/docx/numbering.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,156 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env python
 | 
				
			||||||
 | 
					# vim:fileencoding=utf-8
 | 
				
			||||||
 | 
					from __future__ import (unicode_literals, division, absolute_import,
 | 
				
			||||||
 | 
					                        print_function)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__ = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.ebooks.docx.block_styles import ParagraphStyle
 | 
				
			||||||
 | 
					from calibre.ebooks.docx.char_styles import RunStyle
 | 
				
			||||||
 | 
					from calibre.ebooks.docx.names import XPath, get
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					STYLE_MAP = {
 | 
				
			||||||
 | 
					    'aiueo': 'hiragana',
 | 
				
			||||||
 | 
					    'aiueoFullWidth': 'hiragana',
 | 
				
			||||||
 | 
					    'hebrew1': 'hebrew',
 | 
				
			||||||
 | 
					    'iroha': 'katakana-iroha',
 | 
				
			||||||
 | 
					    'irohaFullWidth': 'katakana-iroha',
 | 
				
			||||||
 | 
					    'lowerLetter': 'lower-alpha',
 | 
				
			||||||
 | 
					    'lowerRoman': 'lower-roman',
 | 
				
			||||||
 | 
					    'none': 'none',
 | 
				
			||||||
 | 
					    'upperLetter': 'upper-alpha',
 | 
				
			||||||
 | 
					    'upperRoman': 'upper-roman',
 | 
				
			||||||
 | 
					    'chineseCounting': 'cjk-ideographic',
 | 
				
			||||||
 | 
					    'decimalZero': 'decimal-leading-zero',
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Level(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, lvl=None):
 | 
				
			||||||
 | 
					        self.restart = None
 | 
				
			||||||
 | 
					        self.start = 0
 | 
				
			||||||
 | 
					        self.fmt = 'decimal'
 | 
				
			||||||
 | 
					        self.para_link = None
 | 
				
			||||||
 | 
					        self.paragraph_style = self.character_style = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if lvl is not None:
 | 
				
			||||||
 | 
					            self.read_from_xml(lvl)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def read_from_xml(self, lvl, override=False):
 | 
				
			||||||
 | 
					        for lr in XPath('./w:lvlRestart[@w:val]')(lvl):
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                self.restart = int(get(lr, 'w:val'))
 | 
				
			||||||
 | 
					            except (TypeError, ValueError):
 | 
				
			||||||
 | 
					                pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for lr in XPath('./w:start[@w:val]')(lvl):
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                self.start = int(get(lr, 'w:val'))
 | 
				
			||||||
 | 
					            except (TypeError, ValueError):
 | 
				
			||||||
 | 
					                pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        lt = None
 | 
				
			||||||
 | 
					        for lr in XPath('./w:lvlText[@w:val]')(lvl):
 | 
				
			||||||
 | 
					            lt = get(lr, 'w:val')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for lr in XPath('./w:numFmt[@w:val]')(lvl):
 | 
				
			||||||
 | 
					            val = get(lr, 'w:val')
 | 
				
			||||||
 | 
					            if val == 'bullet':
 | 
				
			||||||
 | 
					                self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc')
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                self.fmt = STYLE_MAP.get(val, 'decimal')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for lr in XPath('./w:pStyle[@w:val]')(lvl):
 | 
				
			||||||
 | 
					            self.para_link = get(lr, 'w:val')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for pPr in XPath('./w:pPr')(lvl):
 | 
				
			||||||
 | 
					            ps = ParagraphStyle(pPr)
 | 
				
			||||||
 | 
					            if self.paragraph_style is None:
 | 
				
			||||||
 | 
					                self.paragraph_style = ps
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                self.paragraph_style.update(ps)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for rPr in XPath('./w:rPr')(lvl):
 | 
				
			||||||
 | 
					            ps = RunStyle(rPr)
 | 
				
			||||||
 | 
					            if self.character_style is None:
 | 
				
			||||||
 | 
					                self.character_style = ps
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                self.character_style.update(ps)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def copy(self):
 | 
				
			||||||
 | 
					        ans = Level()
 | 
				
			||||||
 | 
					        for x in ('restart', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style'):
 | 
				
			||||||
 | 
					            setattr(ans, x, getattr(self, x))
 | 
				
			||||||
 | 
					        return ans
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class NumberingDefinition(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, parent=None):
 | 
				
			||||||
 | 
					        self.levels = {}
 | 
				
			||||||
 | 
					        if parent is not None:
 | 
				
			||||||
 | 
					            for lvl in XPath('./w:lvl')(parent):
 | 
				
			||||||
 | 
					                try:
 | 
				
			||||||
 | 
					                    ilvl = int(get(lvl, 'w:ilvl', 0))
 | 
				
			||||||
 | 
					                except (TypeError, ValueError):
 | 
				
			||||||
 | 
					                    ilvl = 0
 | 
				
			||||||
 | 
					                self.levels[ilvl] = Level(lvl)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def copy(self):
 | 
				
			||||||
 | 
					        ans = NumberingDefinition()
 | 
				
			||||||
 | 
					        for l, lvl in self.levels.iteritems():
 | 
				
			||||||
 | 
					            ans.levels[l] = lvl.copy()
 | 
				
			||||||
 | 
					        return ans
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Numbering(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self):
 | 
				
			||||||
 | 
					        self.definitions = {}
 | 
				
			||||||
 | 
					        self.instances = {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __call__(self, root, styles):
 | 
				
			||||||
 | 
					        ' Read all numbering style definitions '
 | 
				
			||||||
 | 
					        lazy_load = {}
 | 
				
			||||||
 | 
					        for an in XPath('./w:abstractNum[@w:abstractNumId]')(root):
 | 
				
			||||||
 | 
					            an_id = get(an, 'w:abstractNumId')
 | 
				
			||||||
 | 
					            nsl = XPath('./w:numStyleLink[@w:val]')(an)
 | 
				
			||||||
 | 
					            if nsl:
 | 
				
			||||||
 | 
					                lazy_load[an_id] = get(nsl[0], 'w:val')
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                nd = NumberingDefinition(an)
 | 
				
			||||||
 | 
					                self.definitions[an_id] = nd
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        def create_instance(n, definition):
 | 
				
			||||||
 | 
					            nd = definition.copy()
 | 
				
			||||||
 | 
					            for lo in XPath('./w:lvlOverride')(n):
 | 
				
			||||||
 | 
					                ilvl = get(lo, 'w:ilvl')
 | 
				
			||||||
 | 
					                for lvl in XPath('./w:lvl')(lo)[:1]:
 | 
				
			||||||
 | 
					                    nilvl = get(lvl, 'w:ilvl')
 | 
				
			||||||
 | 
					                    ilvl = nilvl if ilvl is None else ilvl
 | 
				
			||||||
 | 
					                    alvl = nd.levels.get(ilvl, None)
 | 
				
			||||||
 | 
					                    if alvl is None:
 | 
				
			||||||
 | 
					                        alvl = Level()
 | 
				
			||||||
 | 
					                    alvl.read_from_xml(lvl, override=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        next_pass = {}
 | 
				
			||||||
 | 
					        for n in XPath('./w:num[@w:numId]')(root):
 | 
				
			||||||
 | 
					            an_id = None
 | 
				
			||||||
 | 
					            num_id = get(n, 'w:numId')
 | 
				
			||||||
 | 
					            for an in XPath('./w:abstractNumId[@w:val]')(n):
 | 
				
			||||||
 | 
					                an_id = get(an, 'w:val')
 | 
				
			||||||
 | 
					            d = self.definitions.get(an_id, None)
 | 
				
			||||||
 | 
					            if d is None:
 | 
				
			||||||
 | 
					                next_pass[num_id] = (an_id, n)
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					            self.instances[num_id] = create_instance(n, d)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        numbering_links = styles.numbering_style_links
 | 
				
			||||||
 | 
					        for an_id, style_link in lazy_load.iteritems():
 | 
				
			||||||
 | 
					            num_id = numbering_links[style_link]
 | 
				
			||||||
 | 
					            self.definitions[an_id] = self.instances[num_id].copy()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for num_id, (an_id, n) in next_pass.iteritems():
 | 
				
			||||||
 | 
					            d = self.definitions.get(an_id, None)
 | 
				
			||||||
 | 
					            if d is not None:
 | 
				
			||||||
 | 
					                self.instances[num_id] = create_instance(n, d)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -52,6 +52,11 @@ class Style(object):
 | 
				
			|||||||
                else:
 | 
					                else:
 | 
				
			||||||
                    self.character_style.update(rs)
 | 
					                    self.character_style.update(rs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if self.style_type == 'numbering':
 | 
				
			||||||
 | 
					            self.numbering_style_link = None
 | 
				
			||||||
 | 
					            for x in XPath('./w:pPr/w:numPr/w:numId[@w:val]')(elem):
 | 
				
			||||||
 | 
					                self.numbering_style_link = get(x, 'w:val')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def resolve_based_on(self, parent):
 | 
					    def resolve_based_on(self, parent):
 | 
				
			||||||
        if parent.paragraph_style is not None:
 | 
					        if parent.paragraph_style is not None:
 | 
				
			||||||
            if self.paragraph_style is None:
 | 
					            if self.paragraph_style is None:
 | 
				
			||||||
@ -77,6 +82,7 @@ class Styles(object):
 | 
				
			|||||||
        self.classes = {}
 | 
					        self.classes = {}
 | 
				
			||||||
        self.counter = Counter()
 | 
					        self.counter = Counter()
 | 
				
			||||||
        self.default_styles = {}
 | 
					        self.default_styles = {}
 | 
				
			||||||
 | 
					        self.numbering_style_links = {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __iter__(self):
 | 
					    def __iter__(self):
 | 
				
			||||||
        for s in self.id_map.itervalues():
 | 
					        for s in self.id_map.itervalues():
 | 
				
			||||||
@ -98,6 +104,8 @@ class Styles(object):
 | 
				
			|||||||
                self.id_map[s.style_id] = s
 | 
					                self.id_map[s.style_id] = s
 | 
				
			||||||
            if s.is_default:
 | 
					            if s.is_default:
 | 
				
			||||||
                self.default_styles[s.style_type] = s
 | 
					                self.default_styles[s.style_type] = s
 | 
				
			||||||
 | 
					            if s.style_type == 'numbering' and s.numbering_style_link:
 | 
				
			||||||
 | 
					                self.numbering_style_links[s.style_id] = s.numbering_style_link
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.default_paragraph_style = self.default_character_style = None
 | 
					        self.default_paragraph_style = self.default_character_style = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -235,6 +243,9 @@ class Styles(object):
 | 
				
			|||||||
        if obj.tag.endswith('}r'):
 | 
					        if obj.tag.endswith('}r'):
 | 
				
			||||||
            return self.resolve_run(obj)
 | 
					            return self.resolve_run(obj)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def resolve_numbering(self, numbering):
 | 
				
			||||||
 | 
					        pass  # TODO: Implement this
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def register(self, css, prefix):
 | 
					    def register(self, css, prefix):
 | 
				
			||||||
        h = hash(tuple(css.iteritems()))
 | 
					        h = hash(tuple(css.iteritems()))
 | 
				
			||||||
        ans, _ = self.classes.get(h, (None, None))
 | 
					        ans, _ = self.classes.get(h, (None, None))
 | 
				
			||||||
 | 
				
			|||||||
@ -13,8 +13,9 @@ from lxml.html.builder import (
 | 
				
			|||||||
    HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
 | 
					    HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.ebooks.docx.container import DOCX, fromstring
 | 
					from calibre.ebooks.docx.container import DOCX, fromstring
 | 
				
			||||||
from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES
 | 
					from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING
 | 
				
			||||||
from calibre.ebooks.docx.styles import Styles, inherit
 | 
					from calibre.ebooks.docx.styles import Styles, inherit
 | 
				
			||||||
 | 
					from calibre.ebooks.docx.numbering import Numbering
 | 
				
			||||||
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
 | 
					from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Text:
 | 
					class Text:
 | 
				
			||||||
@ -89,12 +90,20 @@ class Convert(object):
 | 
				
			|||||||
        self.write()
 | 
					        self.write()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def read_styles(self, relationships_by_type):
 | 
					    def read_styles(self, relationships_by_type):
 | 
				
			||||||
        sname = relationships_by_type.get(STYLES, None)
 | 
					
 | 
				
			||||||
        if sname is None:
 | 
					        def get_name(rtype, defname):
 | 
				
			||||||
            name = self.docx.document_name.split('/')
 | 
					            name = relationships_by_type.get(rtype, None)
 | 
				
			||||||
            name[-1] = 'styles.xml'
 | 
					            if name is None:
 | 
				
			||||||
            if self.docx.exists(name):
 | 
					                cname = self.docx.document_name.split('/')
 | 
				
			||||||
                sname = name
 | 
					                cname[-1] = defname
 | 
				
			||||||
 | 
					                if self.docx.exists(cname):
 | 
				
			||||||
 | 
					                    name = name
 | 
				
			||||||
 | 
					            return name
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        nname = get_name(NUMBERING, 'numbering.xml')
 | 
				
			||||||
 | 
					        sname = get_name(STYLES, 'styles.xml')
 | 
				
			||||||
 | 
					        numbering = Numbering()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if sname is not None:
 | 
					        if sname is not None:
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
                raw = self.docx.read(sname)
 | 
					                raw = self.docx.read(sname)
 | 
				
			||||||
@ -103,6 +112,16 @@ class Convert(object):
 | 
				
			|||||||
            else:
 | 
					            else:
 | 
				
			||||||
                self.styles(fromstring(raw))
 | 
					                self.styles(fromstring(raw))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if nname is not None:
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                raw = self.docx.read(nname)
 | 
				
			||||||
 | 
					            except KeyError:
 | 
				
			||||||
 | 
					                self.log.warn('Numbering styles %s do not exist' % nname)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                numbering(fromstring(raw), self.styles)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.styles.resolve_numbering(numbering)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def write(self):
 | 
					    def write(self):
 | 
				
			||||||
        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
 | 
					        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
 | 
				
			||||||
        with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
 | 
					        with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user