Implement creation of empty DOCX documents

This commit is contained in:
Kovid Goyal 2015-03-29 10:21:33 +05:30
parent 2168bb9102
commit 3536596399
4 changed files with 83 additions and 53 deletions

View File

@ -34,12 +34,22 @@ class DOCXOutput(OutputFormatPlugin):
'deleted, so be careful.') % 'DOCX'),
}
def convert_metadata(self, oeb):
from lxml import etree
from calibre.ebooks.oeb.base import OPF, OPF2_NS
from calibre.ebooks.metadata.opf2 import OPF as ReadOPF
from io import BytesIO
package = etree.Element(OPF('package'), attrib={'version': '2.0'}, nsmap={None: OPF2_NS})
oeb.metadata.to_opf2(package)
self.mi = ReadOPF(BytesIO(etree.tostring(package, encoding='utf-8')), populate_spine=False, try_to_guess_cover=False).to_book_metadata()
def convert(self, oeb, output_path, input_plugin, opts, log):
from calibre.ebooks.docx.writer.container import DOCX
from calibre.ebooks.docx.writer.from_html import Convert
docx = DOCX(opts, log)
self.convert_metadata(oeb)
Convert(oeb, docx)()
docx.write(output_path, oeb)
docx.write(output_path, self.mi)
if opts.extract_to:
from calibre.ebooks.docx.dump import do_dump
do_dump(output_path, opts.extract_to)

View File

@ -7,7 +7,6 @@ __license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import textwrap, os
from io import BytesIO
from lxml import etree
from lxml.builder import ElementMaker
@ -16,11 +15,10 @@ from calibre import guess_type
from calibre.constants import numeric_version, __appname__
from calibre.ebooks.docx.names import namespaces, STYLES, WEB_SETTINGS, IMAGES, FONTS
from calibre.ebooks.metadata import authors_to_string
from calibre.ebooks.metadata.opf2 import OPF as ReadOPF
from calibre.ebooks.oeb.base import OPF, OPF2_NS
from calibre.utils.date import utcnow
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
from calibre.utils.zipfile import ZipFile
from calibre.ebooks.pdf.render.common import PAPER_SIZES
def xml2str(root, pretty_print=False, with_tail=False):
if hasattr(etree, 'cleanup_namespaces'):
@ -29,6 +27,49 @@ def xml2str(root, pretty_print=False, with_tail=False):
pretty_print=pretty_print, with_tail=with_tail)
return ans
def create_skeleton(opts):
def w(x):
return '{%s}%s' % (namespaces['w'], x)
dn = {k:v for k, v in namespaces.iteritems() if k in {'w', 'r', 'm', 've', 'o', 'wp', 'w10', 'wne', 'a', 'pic'}}
E = ElementMaker(namespace=dn['w'], nsmap=dn)
doc = E.document()
body = E.body()
doc.append(body)
width, height = PAPER_SIZES[opts.docx_page_size]
if opts.docx_custom_page_size is not None:
width, height = map(float, opts.docx_custom_page_size.partition('x')[0::2])
width, height = int(20 * width), int(20 * height)
def margin(which):
return w(which), str(int(getattr(opts, 'margin_'+which) * 20))
body.append(E.sectPr(
E.pgSz(**{w('w'):str(width), w('h'):str(height)}),
E.pgMar(**dict(map(margin, 'left top right bottom'.split()))),
E.cols(**{w('space'):'720'}),
E.docGrid(**{w('linePitch'):"360"}),
))
dn = {k:v for k, v in namespaces.iteritems() if k in tuple('wra') + ('wp',)}
E = ElementMaker(namespace=dn['w'], nsmap=dn)
styles = E.styles(
E.docDefaults(
E.rPrDefault(
E.rPr(
E.rFonts(**{w('asciiTheme'):"minorHAnsi", w('eastAsiaTheme'):"minorEastAsia", w('hAnsiTheme'):"minorHAnsi", w('cstheme'):"minorBidi"}),
E.sz(**{w('val'):'22'}),
E.szCs(**{w('val'):'22'}),
E.lang(**{w('val'):'en-US', w('eastAsia'):"en-US", w('bidi'):"ar-SA"})
)
),
E.pPrDefault(
E.pPr(
E.spacing(**{w('after'):"0", w('line'):"276", w('lineRule'):"auto"})
)
)
)
)
return doc, styles, body
def update_doc_props(root, mi):
def setm(name, text=None, ns='dc'):
ans = root.makeelement('{%s}%s' % (namespaces[ns], name))
@ -92,6 +133,7 @@ class DOCX(object):
E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
self.embedded_fonts = E.Relationships()
self.fonts = {}
self.images = {}
# Boilerplate {{{
@property
@ -165,7 +207,7 @@ class DOCX(object):
# }}}
def convert_metadata(self, oeb):
def convert_metadata(self, mi):
E = ElementMaker(namespace=namespaces['cp'], nsmap={x:namespaces[x] for x in 'cp dc dcterms xsi'.split()})
cp = E.coreProperties(E.revision("1"), E.lastModifiedBy('calibre'))
ts = utcnow().isoformat(str('T')).rpartition('.')[0] + 'Z'
@ -173,17 +215,20 @@ class DOCX(object):
x = cp.makeelement('{%s}%s' % (namespaces['dcterms'], x), **{'{%s}type' % namespaces['xsi']:'dcterms:W3CDTF'})
x.text = ts
cp.append(x)
package = etree.Element(OPF('package'), attrib={'version': '2.0'}, nsmap={None: OPF2_NS})
oeb.metadata.to_opf2(package)
self.mi = ReadOPF(BytesIO(xml2str(package)), populate_spine=False, try_to_guess_cover=False).to_book_metadata()
self.mi = mi
update_doc_props(cp, self.mi)
return xml2str(cp)
def write(self, path_or_stream, oeb):
def create_empty_document(self, mi):
self.document, self.styles = create_skeleton(self.opts)[:2]
def write(self, path_or_stream, mi, create_empty_document=False):
if create_empty_document:
self.create_empty_document(mi)
with ZipFile(path_or_stream, 'w') as zf:
zf.writestr('[Content_Types].xml', self.contenttypes)
zf.writestr('_rels/.rels', self.containerrels)
zf.writestr('docProps/core.xml', self.convert_metadata(oeb))
zf.writestr('docProps/core.xml', self.convert_metadata(mi))
zf.writestr('docProps/app.xml', self.appproperties)
zf.writestr('word/webSettings.xml', self.websettings)
zf.writestr('word/document.xml', xml2str(self.document))

View File

@ -9,15 +9,13 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import re
from lxml import etree
from lxml.builder import ElementMaker
from calibre.ebooks.docx.names import namespaces
from calibre.ebooks.docx.writer.container import create_skeleton
from calibre.ebooks.docx.writer.styles import w, StylesManager
from calibre.ebooks.docx.writer.images import ImagesManager
from calibre.ebooks.docx.writer.fonts import FontsManager
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
from calibre.ebooks.oeb.base import XPath, barename
from calibre.ebooks.pdf.render.common import PAPER_SIZES
class Style(St):
@ -256,46 +254,9 @@ class Convert(object):
self.blocks[-1].add_text(html_child.tail, stylizer.style(html_child.getparent()), html_parent=html_child.getparent(), is_parent_style=True)
def write(self):
dn = {k:v for k, v in namespaces.iteritems() if k in {'w', 'r', 'm', 've', 'o', 'wp', 'w10', 'wne', 'a', 'pic'}}
E = ElementMaker(namespace=dn['w'], nsmap=dn)
self.docx.document = doc = E.document()
body = E.body()
doc.append(body)
self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
for block in self.blocks:
block.serialize(body)
width, height = PAPER_SIZES[self.opts.docx_page_size]
if self.opts.docx_custom_page_size is not None:
width, height = map(float, self.opts.docx_custom_page_size.partition('x')[0::2])
width, height = int(20 * width), int(20 * height)
def margin(which):
return w(which), str(int(getattr(self.opts, 'margin_'+which) * 20))
body.append(E.sectPr(
E.pgSz(**{w('w'):str(width), w('h'):str(height)}),
E.pgMar(**dict(map(margin, 'left top right bottom'.split()))),
E.cols(**{w('space'):'720'}),
E.docGrid(**{w('linePitch'):"360"}),
))
dn = {k:v for k, v in namespaces.iteritems() if k in tuple('wra') + ('wp',)}
E = ElementMaker(namespace=dn['w'], nsmap=dn)
self.docx.styles = E.styles(
E.docDefaults(
E.rPrDefault(
E.rPr(
E.rFonts(**{w('asciiTheme'):"minorHAnsi", w('eastAsiaTheme'):"minorEastAsia", w('hAnsiTheme'):"minorHAnsi", w('cstheme'):"minorBidi"}),
E.sz(**{w('val'):'22'}),
E.szCs(**{w('val'):'22'}),
E.lang(**{w('val'):'en-US', w('eastAsia'):"en-US", w('bidi'):"ar-SA"})
)
),
E.pPrDefault(
E.pPr(
E.spacing(**{w('after'):"0", w('line'):"276", w('lineRule'):"auto"})
)
)
)
)
self.docx.images = {}
self.styles_manager.serialize(self.docx.styles)
self.images_manager.serialize(self.docx.images)
self.fonts_manager.serialize(self.styles_manager.text_styles, self.docx.font_table, self.docx.embedded_fonts, self.docx.fonts)

View File

@ -38,7 +38,21 @@ def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.
''' Create an empty book in the specified format at the specified location. '''
if fmt not in valid_empty_formats:
raise ValueError('Cannot create empty book in the %s format' % fmt)
if fmt not in {'epub', 'azw3'}:
if fmt == 'txt':
with open(path, 'wb') as f:
if not mi.is_null('title'):
f.write(mi.title)
return
if fmt == 'docx':
from calibre.ebooks.conversion.plumber import Plumber
from calibre.ebooks.docx.writer.container import DOCX
from calibre.utils.logging import default_log
p = Plumber('a.docx', 'b.docx', default_log)
p.setup_options()
# Use the word default of one inch page margins
for x in 'left right top bottom'.split():
setattr(p.opts, 'margin_' + x, 72)
DOCX(p.opts, default_log).write(path, mi, create_empty_document=True)
return
path = os.path.abspath(path)
lang = 'und'
@ -104,7 +118,7 @@ if __name__ == '__main__':
mi = Metadata('Test book', authors=('Kovid Goyal',))
path = sys.argv[-1]
ext = path.rpartition('.')[-1].lower()
if ext not in ('epub', 'azw3'):
if ext not in valid_empty_formats:
print ('Unsupported format:', ext)
raise SystemExit(1)
create_book(mi, path, fmt=ext)