diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 041f91f8fb..2e77337eb3 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -533,6 +533,17 @@ class TXTZMetadataWriter(MetadataWriterPlugin): from calibre.ebooks.metadata.extz import set_metadata set_metadata(stream, mi) +class DocXMetadataWriter(MetadataWriterPlugin): + + name = 'Set DOCX metadata' + file_types = set(['docx']) + description = _('Read metadata from %s files')%'DOCX' + + def set_metadata(self, stream, mi, type): + from calibre.ebooks.metadata.docx import set_metadata + return set_metadata(stream, mi) + + plugins += [x for x in list(locals().values()) if isinstance(x, type) and x.__name__.endswith('MetadataWriter')] diff --git a/src/calibre/ebooks/docx/container.py b/src/calibre/ebooks/docx/container.py index 7a1b80a9a4..cb3201e4f7 100644 --- a/src/calibre/ebooks/docx/container.py +++ b/src/calibre/ebooks/docx/container.py @@ -200,17 +200,27 @@ class DOCX(object): return by_id, by_type - @property - def metadata(self): - mi = Metadata(_('Unknown')) + def get_document_properties_names(self): name = self.relationships.get(DOCPROPS, None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml') if names: name = names[0] - if name: + yield name + name = self.relationships.get(APPPROPS, None) + if name is None: + names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml') + if names: + name = names[0] + yield name + + @property + def metadata(self): + mi = Metadata(_('Unknown')) + dp_name, ap_name = self.get_document_properties_names() + if dp_name: try: - raw = self.read(name) + raw = self.read(dp_name) except KeyError: pass else: @@ -223,14 +233,10 @@ class DOCX(object): else: read_default_style_language(raw, mi) - name = self.relationships.get(APPPROPS, None) - if name is None: - names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml') - if names: - name = names[0] - if name: + ap_name = self.relationships.get(APPPROPS, None) + if ap_name: try: - raw = self.read(name) + raw = self.read(ap_name) except KeyError: pass else: diff --git a/src/calibre/ebooks/docx/writer/container.py b/src/calibre/ebooks/docx/writer/container.py index c980dfe853..27a5e0b9ba 100644 --- a/src/calibre/ebooks/docx/writer/container.py +++ b/src/calibre/ebooks/docx/writer/container.py @@ -30,7 +30,7 @@ def xml2str(root, pretty_print=False, with_tail=False): def update_doc_props(root, mi): def setm(name, text=None, ns='dc'): ans = root.makeelement('{%s}%s' % (namespaces[ns], name)) - for child in root: + for child in tuple(root): if child.tag == ans.tag: root.remove(child) ans.text = text diff --git a/src/calibre/ebooks/metadata/docx.py b/src/calibre/ebooks/metadata/docx.py index 5d84471e08..19a0249195 100644 --- a/src/calibre/ebooks/metadata/docx.py +++ b/src/calibre/ebooks/metadata/docx.py @@ -7,9 +7,13 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' -from calibre.ebooks.docx.container import DOCX -from calibre.ebooks.docx.names import XPath, get +from io import BytesIO +from lxml import etree + +from calibre.ebooks.docx.container import DOCX +from calibre.ebooks.docx.writer.container import update_doc_props, xml2str, namespaces +from calibre.ebooks.docx.names import XPath, get from calibre.utils.magick.draw import identify_data images = XPath('//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]') @@ -44,6 +48,30 @@ def get_metadata(stream): return mi +def set_metadata(stream, mi): + from calibre.utils.zipfile import safe_replace + c = DOCX(stream, extract=False) + dp_name, ap_name = c.get_document_properties_names() + dp_raw = c.read(dp_name) + try: + ap_raw = c.read(ap_name) + except Exception: + ap_raw = None + cp = etree.fromstring(dp_raw) + update_doc_props(cp, mi) + replacements = {} + if ap_raw is not None: + ap = etree.fromstring(ap_raw) + comp = ap.makeelement('{%s}Company' % namespaces['ep']) + for child in tuple(ap): + if child.tag == comp.tag: + ap.remove(child) + comp.text = mi.publisher + ap.append(comp) + replacements[ap_name] = BytesIO(xml2str(ap)) + stream.seek(0) + safe_replace(stream, dp_name, BytesIO(xml2str(cp)), extra_replacements=replacements) + if __name__ == '__main__': import sys with open(sys.argv[-1], 'rb') as stream: