mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Dump docx files with prettified xml
This commit is contained in:
parent
9d748f7e91
commit
90374d24c4
37
src/calibre/ebooks/docx/dump.py
Normal file
37
src/calibre/ebooks/docx/dump.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
import sys, os, shutil
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre import walk
|
||||||
|
from calibre.utils.zipfile import ZipFile
|
||||||
|
|
||||||
|
def dump(path):
|
||||||
|
dest = os.path.splitext(os.path.basename(path))[0]
|
||||||
|
dest += '_extracted'
|
||||||
|
if os.path.exists(dest):
|
||||||
|
shutil.rmtree(dest)
|
||||||
|
with ZipFile(path) as zf:
|
||||||
|
zf.extractall(dest)
|
||||||
|
|
||||||
|
for f in walk(dest):
|
||||||
|
if f.endswith('.xml'):
|
||||||
|
with open(f, 'r+b') as stream:
|
||||||
|
raw = stream.read()
|
||||||
|
root = etree.fromstring(raw)
|
||||||
|
stream.seek(0)
|
||||||
|
stream.truncate()
|
||||||
|
stream.write(etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True))
|
||||||
|
|
||||||
|
print (path, 'dumped to', dest)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
dump(sys.argv[-1])
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user