mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Get towards writing a full docx skeleton with all text but no styles/images/etc.
This commit is contained in:
parent
cbc85be903
commit
0f0e62b3a0
@ -24,6 +24,7 @@ FOOTNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships
|
||||
ENDNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes'
|
||||
THEMES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme'
|
||||
SETTINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings'
|
||||
WEB_SETTINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings'
|
||||
|
||||
namespaces = {
|
||||
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
||||
|
@ -8,18 +8,57 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import textwrap
|
||||
|
||||
from lxml import etree
|
||||
from lxml.builder import ElementMaker
|
||||
|
||||
from calibre import guess_type
|
||||
from calibre.constants import numeric_version, __appname__
|
||||
from calibre.ebooks.docx.names import namespaces
|
||||
from calibre.ebooks.oeb.base import xml2str
|
||||
from calibre.ebooks.docx.names import namespaces, STYLES, WEB_SETTINGS
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
def xml2str(root, pretty_print=False, with_tail=False):
|
||||
ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
|
||||
pretty_print=pretty_print, with_tail=with_tail)
|
||||
return ans
|
||||
|
||||
|
||||
class DocumentRelationships(object):
|
||||
|
||||
def __init__(self):
|
||||
self.rmap = {}
|
||||
self.counter = 0
|
||||
for typ, target in {
|
||||
STYLES: 'styles.xml',
|
||||
WEB_SETTINGS: 'webSettings.xml',
|
||||
}.iteritems():
|
||||
self.add_relationship(target, typ)
|
||||
|
||||
def get_relationship_id(self, target, rtype, target_mode=None):
|
||||
return self.rmap.get((target, rtype, target_mode))
|
||||
|
||||
def add_relationship(self, target, rtype, target_mode=None):
|
||||
ans = self.get_relationship_id(target, rtype, target_mode)
|
||||
if ans is None:
|
||||
self.counter += 1
|
||||
ans = 'rId%d' % self.counter
|
||||
self.rmap[(target, rtype, target_mode)] = ans
|
||||
return ans
|
||||
|
||||
def serialize(self):
|
||||
E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
|
||||
relationships = E.Relationships()
|
||||
for (target, rtype, target_mode), rid in self.rmap.iteritems():
|
||||
r = E.Relationship(Id=rid, Type=rtype, Target=target)
|
||||
if target_mode is not None:
|
||||
r.set('TargetMode', target_mode)
|
||||
relationships.append(r)
|
||||
return xml2str(relationships)
|
||||
|
||||
class DOCX(object):
|
||||
|
||||
def __init__(self, opts, log):
|
||||
self.opts, self.log = opts, log
|
||||
self.document_relationships = DocumentRelationships()
|
||||
|
||||
# Boilerplate {{{
|
||||
@property
|
||||
@ -92,7 +131,9 @@ class DOCX(object):
|
||||
zf.writestr('_rels/.rels', self.containerrels)
|
||||
zf.writestr('docProps/app.xml', self.appproperties)
|
||||
zf.writestr('word/webSettings.xml', self.websettings)
|
||||
# TODO: Write document and document relationships
|
||||
zf.writestr('word/document.xml', xml2str(self.document))
|
||||
zf.writestr('word/styles.xml', xml2str(self.styles))
|
||||
zf.writestr('word/_rels/document.xml.rels', self.document_relationships.serialize())
|
||||
|
||||
if __name__ == '__main__':
|
||||
d = DOCX(None, None)
|
||||
|
@ -9,7 +9,9 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import re
|
||||
|
||||
from lxml import etree
|
||||
from lxml.builder import ElementMaker
|
||||
|
||||
from calibre.ebooks.docx.names import namespaces
|
||||
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
|
||||
from calibre.ebooks.oeb.base import XPath, barename
|
||||
@ -42,7 +44,7 @@ class TextStyle(object):
|
||||
|
||||
ALL_PROPS = ('font_family', 'font_size', 'bold', 'italic', 'color',
|
||||
'background_color', 'underline', 'strike', 'dstrike', 'caps',
|
||||
'shadow', 'small_caps', 'spacing', 'vertical-align')
|
||||
'shadow', 'small_caps', 'spacing', 'vertical_align')
|
||||
|
||||
def __init__(self, css):
|
||||
self.font_family = css['font-family'] # TODO: Resolve multiple font families and generic font family names
|
||||
@ -113,6 +115,16 @@ class TextRun(object):
|
||||
def add_break(self, clear='none'):
|
||||
self.texts.append(LineBreak(clear=clear))
|
||||
|
||||
def serialize(self, p):
|
||||
r = p.makeelement('{%s}r' % namespaces['w'])
|
||||
p.append(r)
|
||||
for text, preserve_whitespace in self.texts:
|
||||
t = r.makeelement('{%s}t' % namespaces['w'])
|
||||
r.append(t)
|
||||
t.text = text or ''
|
||||
if preserve_whitespace:
|
||||
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
|
||||
|
||||
style_cache = {}
|
||||
|
||||
class Block(object):
|
||||
@ -120,19 +132,29 @@ class Block(object):
|
||||
def __init__(self):
|
||||
self.runs = []
|
||||
|
||||
def add_text(self, text, style):
|
||||
def add_text(self, text, style, ignore_leading_whitespace=False):
|
||||
ts = TextStyle(style)
|
||||
ws = style['white-space']
|
||||
if self.runs and ts == self.runs[-1].style:
|
||||
run = self.runs[-1]
|
||||
else:
|
||||
run = TextRun(ts)
|
||||
self.runs.append(run)
|
||||
preserve_whitespace = ws in {'pre', 'pre-wrap'}
|
||||
if ignore_leading_whitespace and not preserve_whitespace:
|
||||
text = text.lstrip()
|
||||
if ws == 'pre-line':
|
||||
for text in text.splitlines():
|
||||
run.add_text(text, False)
|
||||
run.add_break()
|
||||
else:
|
||||
run.add_text(text, ws in {'pre', 'pre-wrap'})
|
||||
run.add_text(text, preserve_whitespace)
|
||||
|
||||
def serialize(self, body):
|
||||
p = body.makeelement('{%s}p' % namespaces['w'])
|
||||
body.append(p)
|
||||
for run in self.runs:
|
||||
run.serialize(p)
|
||||
|
||||
class Convert(object):
|
||||
|
||||
@ -149,6 +171,8 @@ class Convert(object):
|
||||
for item in self.oeb.spine:
|
||||
self.process_item(item)
|
||||
|
||||
self.write()
|
||||
|
||||
def process_item(self, item):
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.opts.output_profile)
|
||||
|
||||
@ -159,7 +183,7 @@ class Convert(object):
|
||||
|
||||
def process_block(self, html_block, docx_block, stylizer, ignore_tail=False):
|
||||
if html_block.text:
|
||||
docx_block.add_text(html_block.text, stylizer.style(html_block))
|
||||
docx_block.add_text(html_block.text, stylizer.style(html_block), ignore_leading_whitespace=True)
|
||||
|
||||
for child in html_block.iterchildren(etree.Element):
|
||||
tag = barename(child.tag)
|
||||
@ -174,7 +198,7 @@ class Convert(object):
|
||||
else:
|
||||
self.process_inline(child, self.blocks[-1], stylizer)
|
||||
|
||||
if ignore_tail is False and html_block.tail:
|
||||
if ignore_tail is False and html_block.tail and html_block.tail.strip():
|
||||
b = docx_block
|
||||
if b is not self.blocks[-1]:
|
||||
b = Block()
|
||||
@ -200,3 +224,28 @@ class Convert(object):
|
||||
|
||||
if html_child.tail:
|
||||
docx_block.add_text(html_child.tail, stylizer.style(html_child.getparent()))
|
||||
|
||||
def write(self):
|
||||
dn = {k:v for k, v in namespaces.iteritems() if k in {'w', 'r', 'm', 've', 'o', 'wp', 'w10', 'wne'}}
|
||||
E = ElementMaker(namespace=dn['w'], nsmap=dn)
|
||||
self.docx.document = doc = E.document()
|
||||
body = E.body()
|
||||
doc.append(body)
|
||||
for block in self.blocks:
|
||||
block.serialize(body)
|
||||
|
||||
dn = {k:v for k, v in namespaces.iteritems() if k in 'wr'}
|
||||
E = ElementMaker(namespace=dn['w'], nsmap=dn)
|
||||
self.docx.styles = E.styles(
|
||||
E.docDefaults(
|
||||
E.rPrDefault(
|
||||
E.rPr(
|
||||
E.rFonts(),
|
||||
)
|
||||
),
|
||||
E.pPrDefault(
|
||||
E.pPr(
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user