mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Get towards writing a full docx skeleton with all text but no styles/images/etc.
This commit is contained in:
parent
cbc85be903
commit
0f0e62b3a0
@ -24,6 +24,7 @@ FOOTNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships
|
|||||||
ENDNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes'
|
ENDNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes'
|
||||||
THEMES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme'
|
THEMES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme'
|
||||||
SETTINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings'
|
SETTINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings'
|
||||||
|
WEB_SETTINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings'
|
||||||
|
|
||||||
namespaces = {
|
namespaces = {
|
||||||
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
||||||
|
@ -8,18 +8,57 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
|
|
||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
from lxml.builder import ElementMaker
|
from lxml.builder import ElementMaker
|
||||||
|
|
||||||
from calibre import guess_type
|
from calibre import guess_type
|
||||||
from calibre.constants import numeric_version, __appname__
|
from calibre.constants import numeric_version, __appname__
|
||||||
from calibre.ebooks.docx.names import namespaces
|
from calibre.ebooks.docx.names import namespaces, STYLES, WEB_SETTINGS
|
||||||
from calibre.ebooks.oeb.base import xml2str
|
|
||||||
from calibre.utils.zipfile import ZipFile
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
|
def xml2str(root, pretty_print=False, with_tail=False):
|
||||||
|
ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
|
||||||
|
pretty_print=pretty_print, with_tail=with_tail)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentRelationships(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.rmap = {}
|
||||||
|
self.counter = 0
|
||||||
|
for typ, target in {
|
||||||
|
STYLES: 'styles.xml',
|
||||||
|
WEB_SETTINGS: 'webSettings.xml',
|
||||||
|
}.iteritems():
|
||||||
|
self.add_relationship(target, typ)
|
||||||
|
|
||||||
|
def get_relationship_id(self, target, rtype, target_mode=None):
|
||||||
|
return self.rmap.get((target, rtype, target_mode))
|
||||||
|
|
||||||
|
def add_relationship(self, target, rtype, target_mode=None):
|
||||||
|
ans = self.get_relationship_id(target, rtype, target_mode)
|
||||||
|
if ans is None:
|
||||||
|
self.counter += 1
|
||||||
|
ans = 'rId%d' % self.counter
|
||||||
|
self.rmap[(target, rtype, target_mode)] = ans
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def serialize(self):
|
||||||
|
E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
|
||||||
|
relationships = E.Relationships()
|
||||||
|
for (target, rtype, target_mode), rid in self.rmap.iteritems():
|
||||||
|
r = E.Relationship(Id=rid, Type=rtype, Target=target)
|
||||||
|
if target_mode is not None:
|
||||||
|
r.set('TargetMode', target_mode)
|
||||||
|
relationships.append(r)
|
||||||
|
return xml2str(relationships)
|
||||||
|
|
||||||
class DOCX(object):
|
class DOCX(object):
|
||||||
|
|
||||||
def __init__(self, opts, log):
|
def __init__(self, opts, log):
|
||||||
self.opts, self.log = opts, log
|
self.opts, self.log = opts, log
|
||||||
|
self.document_relationships = DocumentRelationships()
|
||||||
|
|
||||||
# Boilerplate {{{
|
# Boilerplate {{{
|
||||||
@property
|
@property
|
||||||
@ -92,7 +131,9 @@ class DOCX(object):
|
|||||||
zf.writestr('_rels/.rels', self.containerrels)
|
zf.writestr('_rels/.rels', self.containerrels)
|
||||||
zf.writestr('docProps/app.xml', self.appproperties)
|
zf.writestr('docProps/app.xml', self.appproperties)
|
||||||
zf.writestr('word/webSettings.xml', self.websettings)
|
zf.writestr('word/webSettings.xml', self.websettings)
|
||||||
# TODO: Write document and document relationships
|
zf.writestr('word/document.xml', xml2str(self.document))
|
||||||
|
zf.writestr('word/styles.xml', xml2str(self.styles))
|
||||||
|
zf.writestr('word/_rels/document.xml.rels', self.document_relationships.serialize())
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
d = DOCX(None, None)
|
d = DOCX(None, None)
|
||||||
|
@ -9,7 +9,9 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
from lxml.builder import ElementMaker
|
||||||
|
|
||||||
|
from calibre.ebooks.docx.names import namespaces
|
||||||
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
|
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
|
||||||
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
|
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
|
||||||
from calibre.ebooks.oeb.base import XPath, barename
|
from calibre.ebooks.oeb.base import XPath, barename
|
||||||
@ -42,7 +44,7 @@ class TextStyle(object):
|
|||||||
|
|
||||||
ALL_PROPS = ('font_family', 'font_size', 'bold', 'italic', 'color',
|
ALL_PROPS = ('font_family', 'font_size', 'bold', 'italic', 'color',
|
||||||
'background_color', 'underline', 'strike', 'dstrike', 'caps',
|
'background_color', 'underline', 'strike', 'dstrike', 'caps',
|
||||||
'shadow', 'small_caps', 'spacing', 'vertical-align')
|
'shadow', 'small_caps', 'spacing', 'vertical_align')
|
||||||
|
|
||||||
def __init__(self, css):
|
def __init__(self, css):
|
||||||
self.font_family = css['font-family'] # TODO: Resolve multiple font families and generic font family names
|
self.font_family = css['font-family'] # TODO: Resolve multiple font families and generic font family names
|
||||||
@ -113,6 +115,16 @@ class TextRun(object):
|
|||||||
def add_break(self, clear='none'):
|
def add_break(self, clear='none'):
|
||||||
self.texts.append(LineBreak(clear=clear))
|
self.texts.append(LineBreak(clear=clear))
|
||||||
|
|
||||||
|
def serialize(self, p):
|
||||||
|
r = p.makeelement('{%s}r' % namespaces['w'])
|
||||||
|
p.append(r)
|
||||||
|
for text, preserve_whitespace in self.texts:
|
||||||
|
t = r.makeelement('{%s}t' % namespaces['w'])
|
||||||
|
r.append(t)
|
||||||
|
t.text = text or ''
|
||||||
|
if preserve_whitespace:
|
||||||
|
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
|
||||||
|
|
||||||
style_cache = {}
|
style_cache = {}
|
||||||
|
|
||||||
class Block(object):
|
class Block(object):
|
||||||
@ -120,19 +132,29 @@ class Block(object):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.runs = []
|
self.runs = []
|
||||||
|
|
||||||
def add_text(self, text, style):
|
def add_text(self, text, style, ignore_leading_whitespace=False):
|
||||||
ts = TextStyle(style)
|
ts = TextStyle(style)
|
||||||
ws = style['white-space']
|
ws = style['white-space']
|
||||||
if self.runs and ts == self.runs[-1].style:
|
if self.runs and ts == self.runs[-1].style:
|
||||||
run = self.runs[-1]
|
run = self.runs[-1]
|
||||||
else:
|
else:
|
||||||
run = TextRun(ts)
|
run = TextRun(ts)
|
||||||
|
self.runs.append(run)
|
||||||
|
preserve_whitespace = ws in {'pre', 'pre-wrap'}
|
||||||
|
if ignore_leading_whitespace and not preserve_whitespace:
|
||||||
|
text = text.lstrip()
|
||||||
if ws == 'pre-line':
|
if ws == 'pre-line':
|
||||||
for text in text.splitlines():
|
for text in text.splitlines():
|
||||||
run.add_text(text, False)
|
run.add_text(text, False)
|
||||||
run.add_break()
|
run.add_break()
|
||||||
else:
|
else:
|
||||||
run.add_text(text, ws in {'pre', 'pre-wrap'})
|
run.add_text(text, preserve_whitespace)
|
||||||
|
|
||||||
|
def serialize(self, body):
|
||||||
|
p = body.makeelement('{%s}p' % namespaces['w'])
|
||||||
|
body.append(p)
|
||||||
|
for run in self.runs:
|
||||||
|
run.serialize(p)
|
||||||
|
|
||||||
class Convert(object):
|
class Convert(object):
|
||||||
|
|
||||||
@ -149,6 +171,8 @@ class Convert(object):
|
|||||||
for item in self.oeb.spine:
|
for item in self.oeb.spine:
|
||||||
self.process_item(item)
|
self.process_item(item)
|
||||||
|
|
||||||
|
self.write()
|
||||||
|
|
||||||
def process_item(self, item):
|
def process_item(self, item):
|
||||||
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.opts.output_profile)
|
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.opts.output_profile)
|
||||||
|
|
||||||
@ -159,7 +183,7 @@ class Convert(object):
|
|||||||
|
|
||||||
def process_block(self, html_block, docx_block, stylizer, ignore_tail=False):
|
def process_block(self, html_block, docx_block, stylizer, ignore_tail=False):
|
||||||
if html_block.text:
|
if html_block.text:
|
||||||
docx_block.add_text(html_block.text, stylizer.style(html_block))
|
docx_block.add_text(html_block.text, stylizer.style(html_block), ignore_leading_whitespace=True)
|
||||||
|
|
||||||
for child in html_block.iterchildren(etree.Element):
|
for child in html_block.iterchildren(etree.Element):
|
||||||
tag = barename(child.tag)
|
tag = barename(child.tag)
|
||||||
@ -174,7 +198,7 @@ class Convert(object):
|
|||||||
else:
|
else:
|
||||||
self.process_inline(child, self.blocks[-1], stylizer)
|
self.process_inline(child, self.blocks[-1], stylizer)
|
||||||
|
|
||||||
if ignore_tail is False and html_block.tail:
|
if ignore_tail is False and html_block.tail and html_block.tail.strip():
|
||||||
b = docx_block
|
b = docx_block
|
||||||
if b is not self.blocks[-1]:
|
if b is not self.blocks[-1]:
|
||||||
b = Block()
|
b = Block()
|
||||||
@ -200,3 +224,28 @@ class Convert(object):
|
|||||||
|
|
||||||
if html_child.tail:
|
if html_child.tail:
|
||||||
docx_block.add_text(html_child.tail, stylizer.style(html_child.getparent()))
|
docx_block.add_text(html_child.tail, stylizer.style(html_child.getparent()))
|
||||||
|
|
||||||
|
def write(self):
|
||||||
|
dn = {k:v for k, v in namespaces.iteritems() if k in {'w', 'r', 'm', 've', 'o', 'wp', 'w10', 'wne'}}
|
||||||
|
E = ElementMaker(namespace=dn['w'], nsmap=dn)
|
||||||
|
self.docx.document = doc = E.document()
|
||||||
|
body = E.body()
|
||||||
|
doc.append(body)
|
||||||
|
for block in self.blocks:
|
||||||
|
block.serialize(body)
|
||||||
|
|
||||||
|
dn = {k:v for k, v in namespaces.iteritems() if k in 'wr'}
|
||||||
|
E = ElementMaker(namespace=dn['w'], nsmap=dn)
|
||||||
|
self.docx.styles = E.styles(
|
||||||
|
E.docDefaults(
|
||||||
|
E.rPrDefault(
|
||||||
|
E.rPr(
|
||||||
|
E.rFonts(),
|
||||||
|
)
|
||||||
|
),
|
||||||
|
E.pPrDefault(
|
||||||
|
E.pPr(
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user