From 90374d24c4a59759dff8dde2ddb3e0cdf39d350c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 11 May 2013 13:01:07 +0530 Subject: [PATCH] Dump docx files with prettified xml --- src/calibre/ebooks/docx/dump.py | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 src/calibre/ebooks/docx/dump.py diff --git a/src/calibre/ebooks/docx/dump.py b/src/calibre/ebooks/docx/dump.py new file mode 100644 index 0000000000..f6432125c5 --- /dev/null +++ b/src/calibre/ebooks/docx/dump.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +import sys, os, shutil + +from lxml import etree + +from calibre import walk +from calibre.utils.zipfile import ZipFile + +def dump(path): + dest = os.path.splitext(os.path.basename(path))[0] + dest += '_extracted' + if os.path.exists(dest): + shutil.rmtree(dest) + with ZipFile(path) as zf: + zf.extractall(dest) + + for f in walk(dest): + if f.endswith('.xml'): + with open(f, 'r+b') as stream: + raw = stream.read() + root = etree.fromstring(raw) + stream.seek(0) + stream.truncate() + stream.write(etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True)) + + print (path, 'dumped to', dest) + +if __name__ == '__main__': + dump(sys.argv[-1]) +