diff --git a/src/calibre/ebooks/conversion/plugins/docx_output.py b/src/calibre/ebooks/conversion/plugins/docx_output.py index de391817c8..e66488483a 100644 --- a/src/calibre/ebooks/conversion/plugins/docx_output.py +++ b/src/calibre/ebooks/conversion/plugins/docx_output.py @@ -34,12 +34,22 @@ class DOCXOutput(OutputFormatPlugin): 'deleted, so be careful.') % 'DOCX'), } + def convert_metadata(self, oeb): + from lxml import etree + from calibre.ebooks.oeb.base import OPF, OPF2_NS + from calibre.ebooks.metadata.opf2 import OPF as ReadOPF + from io import BytesIO + package = etree.Element(OPF('package'), attrib={'version': '2.0'}, nsmap={None: OPF2_NS}) + oeb.metadata.to_opf2(package) + self.mi = ReadOPF(BytesIO(etree.tostring(package, encoding='utf-8')), populate_spine=False, try_to_guess_cover=False).to_book_metadata() + def convert(self, oeb, output_path, input_plugin, opts, log): from calibre.ebooks.docx.writer.container import DOCX from calibre.ebooks.docx.writer.from_html import Convert docx = DOCX(opts, log) + self.convert_metadata(oeb) Convert(oeb, docx)() - docx.write(output_path, oeb) + docx.write(output_path, self.mi) if opts.extract_to: from calibre.ebooks.docx.dump import do_dump do_dump(output_path, opts.extract_to) diff --git a/src/calibre/ebooks/docx/writer/container.py b/src/calibre/ebooks/docx/writer/container.py index c935173bed..30f6dbb6ab 100644 --- a/src/calibre/ebooks/docx/writer/container.py +++ b/src/calibre/ebooks/docx/writer/container.py @@ -7,7 +7,6 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' import textwrap, os -from io import BytesIO from lxml import etree from lxml.builder import ElementMaker @@ -16,11 +15,10 @@ from calibre import guess_type from calibre.constants import numeric_version, __appname__ from calibre.ebooks.docx.names import namespaces, STYLES, WEB_SETTINGS, IMAGES, FONTS from calibre.ebooks.metadata import authors_to_string -from calibre.ebooks.metadata.opf2 import OPF as ReadOPF -from calibre.ebooks.oeb.base import OPF, OPF2_NS from calibre.utils.date import utcnow from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 from calibre.utils.zipfile import ZipFile +from calibre.ebooks.pdf.render.common import PAPER_SIZES def xml2str(root, pretty_print=False, with_tail=False): if hasattr(etree, 'cleanup_namespaces'): @@ -29,6 +27,49 @@ def xml2str(root, pretty_print=False, with_tail=False): pretty_print=pretty_print, with_tail=with_tail) return ans +def create_skeleton(opts): + def w(x): + return '{%s}%s' % (namespaces['w'], x) + dn = {k:v for k, v in namespaces.iteritems() if k in {'w', 'r', 'm', 've', 'o', 'wp', 'w10', 'wne', 'a', 'pic'}} + E = ElementMaker(namespace=dn['w'], nsmap=dn) + doc = E.document() + body = E.body() + doc.append(body) + width, height = PAPER_SIZES[opts.docx_page_size] + if opts.docx_custom_page_size is not None: + width, height = map(float, opts.docx_custom_page_size.partition('x')[0::2]) + width, height = int(20 * width), int(20 * height) + def margin(which): + return w(which), str(int(getattr(opts, 'margin_'+which) * 20)) + body.append(E.sectPr( + E.pgSz(**{w('w'):str(width), w('h'):str(height)}), + E.pgMar(**dict(map(margin, 'left top right bottom'.split()))), + E.cols(**{w('space'):'720'}), + E.docGrid(**{w('linePitch'):"360"}), + )) + + dn = {k:v for k, v in namespaces.iteritems() if k in tuple('wra') + ('wp',)} + E = ElementMaker(namespace=dn['w'], nsmap=dn) + styles = E.styles( + E.docDefaults( + E.rPrDefault( + E.rPr( + E.rFonts(**{w('asciiTheme'):"minorHAnsi", w('eastAsiaTheme'):"minorEastAsia", w('hAnsiTheme'):"minorHAnsi", w('cstheme'):"minorBidi"}), + E.sz(**{w('val'):'22'}), + E.szCs(**{w('val'):'22'}), + E.lang(**{w('val'):'en-US', w('eastAsia'):"en-US", w('bidi'):"ar-SA"}) + ) + ), + E.pPrDefault( + E.pPr( + E.spacing(**{w('after'):"0", w('line'):"276", w('lineRule'):"auto"}) + ) + ) + ) + ) + return doc, styles, body + + def update_doc_props(root, mi): def setm(name, text=None, ns='dc'): ans = root.makeelement('{%s}%s' % (namespaces[ns], name)) @@ -92,6 +133,7 @@ class DOCX(object): E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']}) self.embedded_fonts = E.Relationships() self.fonts = {} + self.images = {} # Boilerplate {{{ @property @@ -165,7 +207,7 @@ class DOCX(object): # }}} - def convert_metadata(self, oeb): + def convert_metadata(self, mi): E = ElementMaker(namespace=namespaces['cp'], nsmap={x:namespaces[x] for x in 'cp dc dcterms xsi'.split()}) cp = E.coreProperties(E.revision("1"), E.lastModifiedBy('calibre')) ts = utcnow().isoformat(str('T')).rpartition('.')[0] + 'Z' @@ -173,17 +215,20 @@ class DOCX(object): x = cp.makeelement('{%s}%s' % (namespaces['dcterms'], x), **{'{%s}type' % namespaces['xsi']:'dcterms:W3CDTF'}) x.text = ts cp.append(x) - package = etree.Element(OPF('package'), attrib={'version': '2.0'}, nsmap={None: OPF2_NS}) - oeb.metadata.to_opf2(package) - self.mi = ReadOPF(BytesIO(xml2str(package)), populate_spine=False, try_to_guess_cover=False).to_book_metadata() + self.mi = mi update_doc_props(cp, self.mi) return xml2str(cp) - def write(self, path_or_stream, oeb): + def create_empty_document(self, mi): + self.document, self.styles = create_skeleton(self.opts)[:2] + + def write(self, path_or_stream, mi, create_empty_document=False): + if create_empty_document: + self.create_empty_document(mi) with ZipFile(path_or_stream, 'w') as zf: zf.writestr('[Content_Types].xml', self.contenttypes) zf.writestr('_rels/.rels', self.containerrels) - zf.writestr('docProps/core.xml', self.convert_metadata(oeb)) + zf.writestr('docProps/core.xml', self.convert_metadata(mi)) zf.writestr('docProps/app.xml', self.appproperties) zf.writestr('word/webSettings.xml', self.websettings) zf.writestr('word/document.xml', xml2str(self.document)) diff --git a/src/calibre/ebooks/docx/writer/from_html.py b/src/calibre/ebooks/docx/writer/from_html.py index 52921bd403..ec06b8eb20 100644 --- a/src/calibre/ebooks/docx/writer/from_html.py +++ b/src/calibre/ebooks/docx/writer/from_html.py @@ -9,15 +9,13 @@ __copyright__ = '2013, Kovid Goyal ' import re from lxml import etree -from lxml.builder import ElementMaker -from calibre.ebooks.docx.names import namespaces +from calibre.ebooks.docx.writer.container import create_skeleton from calibre.ebooks.docx.writer.styles import w, StylesManager from calibre.ebooks.docx.writer.images import ImagesManager from calibre.ebooks.docx.writer.fonts import FontsManager from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St from calibre.ebooks.oeb.base import XPath, barename -from calibre.ebooks.pdf.render.common import PAPER_SIZES class Style(St): @@ -256,46 +254,9 @@ class Convert(object): self.blocks[-1].add_text(html_child.tail, stylizer.style(html_child.getparent()), html_parent=html_child.getparent(), is_parent_style=True) def write(self): - dn = {k:v for k, v in namespaces.iteritems() if k in {'w', 'r', 'm', 've', 'o', 'wp', 'w10', 'wne', 'a', 'pic'}} - E = ElementMaker(namespace=dn['w'], nsmap=dn) - self.docx.document = doc = E.document() - body = E.body() - doc.append(body) + self.docx.document, self.docx.styles, body = create_skeleton(self.opts) for block in self.blocks: block.serialize(body) - width, height = PAPER_SIZES[self.opts.docx_page_size] - if self.opts.docx_custom_page_size is not None: - width, height = map(float, self.opts.docx_custom_page_size.partition('x')[0::2]) - width, height = int(20 * width), int(20 * height) - def margin(which): - return w(which), str(int(getattr(self.opts, 'margin_'+which) * 20)) - body.append(E.sectPr( - E.pgSz(**{w('w'):str(width), w('h'):str(height)}), - E.pgMar(**dict(map(margin, 'left top right bottom'.split()))), - E.cols(**{w('space'):'720'}), - E.docGrid(**{w('linePitch'):"360"}), - )) - - dn = {k:v for k, v in namespaces.iteritems() if k in tuple('wra') + ('wp',)} - E = ElementMaker(namespace=dn['w'], nsmap=dn) - self.docx.styles = E.styles( - E.docDefaults( - E.rPrDefault( - E.rPr( - E.rFonts(**{w('asciiTheme'):"minorHAnsi", w('eastAsiaTheme'):"minorEastAsia", w('hAnsiTheme'):"minorHAnsi", w('cstheme'):"minorBidi"}), - E.sz(**{w('val'):'22'}), - E.szCs(**{w('val'):'22'}), - E.lang(**{w('val'):'en-US', w('eastAsia'):"en-US", w('bidi'):"ar-SA"}) - ) - ), - E.pPrDefault( - E.pPr( - E.spacing(**{w('after'):"0", w('line'):"276", w('lineRule'):"auto"}) - ) - ) - ) - ) - self.docx.images = {} self.styles_manager.serialize(self.docx.styles) self.images_manager.serialize(self.docx.images) self.fonts_manager.serialize(self.styles_manager.text_styles, self.docx.font_table, self.docx.embedded_fonts, self.docx.fonts) diff --git a/src/calibre/ebooks/oeb/polish/create.py b/src/calibre/ebooks/oeb/polish/create.py index 982a4e97b2..792372b5bf 100644 --- a/src/calibre/ebooks/oeb/polish/create.py +++ b/src/calibre/ebooks/oeb/polish/create.py @@ -38,7 +38,21 @@ def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start. ''' Create an empty book in the specified format at the specified location. ''' if fmt not in valid_empty_formats: raise ValueError('Cannot create empty book in the %s format' % fmt) - if fmt not in {'epub', 'azw3'}: + if fmt == 'txt': + with open(path, 'wb') as f: + if not mi.is_null('title'): + f.write(mi.title) + return + if fmt == 'docx': + from calibre.ebooks.conversion.plumber import Plumber + from calibre.ebooks.docx.writer.container import DOCX + from calibre.utils.logging import default_log + p = Plumber('a.docx', 'b.docx', default_log) + p.setup_options() + # Use the word default of one inch page margins + for x in 'left right top bottom'.split(): + setattr(p.opts, 'margin_' + x, 72) + DOCX(p.opts, default_log).write(path, mi, create_empty_document=True) return path = os.path.abspath(path) lang = 'und' @@ -104,7 +118,7 @@ if __name__ == '__main__': mi = Metadata('Test book', authors=('Kovid Goyal',)) path = sys.argv[-1] ext = path.rpartition('.')[-1].lower() - if ext not in ('epub', 'azw3'): + if ext not in valid_empty_formats: print ('Unsupported format:', ext) raise SystemExit(1) create_book(mi, path, fmt=ext)