mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Implement updating metadata in DOCX files
This commit is contained in:
parent
ca1143d043
commit
2a0ee491e9
@ -533,6 +533,17 @@ class TXTZMetadataWriter(MetadataWriterPlugin):
|
||||
from calibre.ebooks.metadata.extz import set_metadata
|
||||
set_metadata(stream, mi)
|
||||
|
||||
class DocXMetadataWriter(MetadataWriterPlugin):
|
||||
|
||||
name = 'Set DOCX metadata'
|
||||
file_types = set(['docx'])
|
||||
description = _('Read metadata from %s files')%'DOCX'
|
||||
|
||||
def set_metadata(self, stream, mi, type):
|
||||
from calibre.ebooks.metadata.docx import set_metadata
|
||||
return set_metadata(stream, mi)
|
||||
|
||||
|
||||
plugins += [x for x in list(locals().values()) if isinstance(x, type) and
|
||||
x.__name__.endswith('MetadataWriter')]
|
||||
|
||||
|
@ -200,17 +200,27 @@ class DOCX(object):
|
||||
|
||||
return by_id, by_type
|
||||
|
||||
@property
|
||||
def metadata(self):
|
||||
mi = Metadata(_('Unknown'))
|
||||
def get_document_properties_names(self):
|
||||
name = self.relationships.get(DOCPROPS, None)
|
||||
if name is None:
|
||||
names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml')
|
||||
if names:
|
||||
name = names[0]
|
||||
if name:
|
||||
yield name
|
||||
name = self.relationships.get(APPPROPS, None)
|
||||
if name is None:
|
||||
names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml')
|
||||
if names:
|
||||
name = names[0]
|
||||
yield name
|
||||
|
||||
@property
|
||||
def metadata(self):
|
||||
mi = Metadata(_('Unknown'))
|
||||
dp_name, ap_name = self.get_document_properties_names()
|
||||
if dp_name:
|
||||
try:
|
||||
raw = self.read(name)
|
||||
raw = self.read(dp_name)
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
@ -223,14 +233,10 @@ class DOCX(object):
|
||||
else:
|
||||
read_default_style_language(raw, mi)
|
||||
|
||||
name = self.relationships.get(APPPROPS, None)
|
||||
if name is None:
|
||||
names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml')
|
||||
if names:
|
||||
name = names[0]
|
||||
if name:
|
||||
ap_name = self.relationships.get(APPPROPS, None)
|
||||
if ap_name:
|
||||
try:
|
||||
raw = self.read(name)
|
||||
raw = self.read(ap_name)
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
|
@ -30,7 +30,7 @@ def xml2str(root, pretty_print=False, with_tail=False):
|
||||
def update_doc_props(root, mi):
|
||||
def setm(name, text=None, ns='dc'):
|
||||
ans = root.makeelement('{%s}%s' % (namespaces[ns], name))
|
||||
for child in root:
|
||||
for child in tuple(root):
|
||||
if child.tag == ans.tag:
|
||||
root.remove(child)
|
||||
ans.text = text
|
||||
|
@ -7,9 +7,13 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.ebooks.docx.container import DOCX
|
||||
from calibre.ebooks.docx.names import XPath, get
|
||||
from io import BytesIO
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.docx.container import DOCX
|
||||
from calibre.ebooks.docx.writer.container import update_doc_props, xml2str, namespaces
|
||||
from calibre.ebooks.docx.names import XPath, get
|
||||
from calibre.utils.magick.draw import identify_data
|
||||
|
||||
images = XPath('//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]')
|
||||
@ -44,6 +48,30 @@ def get_metadata(stream):
|
||||
|
||||
return mi
|
||||
|
||||
def set_metadata(stream, mi):
|
||||
from calibre.utils.zipfile import safe_replace
|
||||
c = DOCX(stream, extract=False)
|
||||
dp_name, ap_name = c.get_document_properties_names()
|
||||
dp_raw = c.read(dp_name)
|
||||
try:
|
||||
ap_raw = c.read(ap_name)
|
||||
except Exception:
|
||||
ap_raw = None
|
||||
cp = etree.fromstring(dp_raw)
|
||||
update_doc_props(cp, mi)
|
||||
replacements = {}
|
||||
if ap_raw is not None:
|
||||
ap = etree.fromstring(ap_raw)
|
||||
comp = ap.makeelement('{%s}Company' % namespaces['ep'])
|
||||
for child in tuple(ap):
|
||||
if child.tag == comp.tag:
|
||||
ap.remove(child)
|
||||
comp.text = mi.publisher
|
||||
ap.append(comp)
|
||||
replacements[ap_name] = BytesIO(xml2str(ap))
|
||||
stream.seek(0)
|
||||
safe_replace(stream, dp_name, BytesIO(xml2str(cp)), extra_replacements=replacements)
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
with open(sys.argv[-1], 'rb') as stream:
|
||||
|
Loading…
x
Reference in New Issue
Block a user