diff --git a/src/calibre/ebooks/docx/dump.py b/src/calibre/ebooks/docx/dump.py index d111bc5a83..d5bda75405 100644 --- a/src/calibre/ebooks/docx/dump.py +++ b/src/calibre/ebooks/docx/dump.py @@ -13,15 +13,8 @@ from lxml import etree from calibre import walk from calibre.utils.zipfile import ZipFile -def dump(path): - dest = os.path.splitext(os.path.basename(path))[0] - dest += '-dumped' - if os.path.exists(dest): - shutil.rmtree(dest) - with ZipFile(path) as zf: - zf.extractall(dest) - - for f in walk(dest): +def pretty_all_xml_in_dir(path): + for f in walk(path): if f.endswith('.xml') or f.endswith('.rels'): with open(f, 'r+b') as stream: raw = stream.read() @@ -31,6 +24,15 @@ def dump(path): stream.truncate() stream.write(etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True)) +def dump(path): + dest = os.path.splitext(os.path.basename(path))[0] + dest += '-dumped' + if os.path.exists(dest): + shutil.rmtree(dest) + with ZipFile(path) as zf: + zf.extractall(dest) + pretty_all_xml_in_dir(dest) + print (path, 'dumped to', dest) if __name__ == '__main__': diff --git a/src/calibre/ebooks/tweak.py b/src/calibre/ebooks/tweak.py index 16301607a5..27f1475da6 100644 --- a/src/calibre/ebooks/tweak.py +++ b/src/calibre/ebooks/tweak.py @@ -70,6 +70,15 @@ def zip_rebuilder(tdir, path): zfn = unicodedata.normalize('NFC', os.path.relpath(absfn, tdir).replace(os.sep, '/')) zf.write(absfn, zfn) +def docx_exploder(path, tdir, question=lambda x:True): + zipextract(path, tdir) + from calibre.ebooks.docx.dump import pretty_all_xml_in_dir + pretty_all_xml_in_dir(tdir) + for f in walk(tdir): + if os.path.basename(f) == 'document.xml': + return f + raise Error('Invalid book: Could not find document.xml') + def get_tools(fmt): fmt = fmt.lower() @@ -78,6 +87,8 @@ def get_tools(fmt): ans = mobi_exploder, rebuild elif fmt in {'epub', 'htmlz'}: ans = zip_exploder, zip_rebuilder + elif fmt == 'docx': + ans = docx_exploder, zip_rebuilder else: ans = None, None