Allow tweaking of DOCX files using calibre-debug --explode-book

This commit is contained in:
Kovid Goyal 2015-02-03 09:50:37 +05:30
parent 0b92b95637
commit 28e6946de4
2 changed files with 22 additions and 9 deletions

View File

@ -13,15 +13,8 @@ from lxml import etree
from calibre import walk
from calibre.utils.zipfile import ZipFile
def dump(path):
dest = os.path.splitext(os.path.basename(path))[0]
dest += '-dumped'
if os.path.exists(dest):
shutil.rmtree(dest)
with ZipFile(path) as zf:
zf.extractall(dest)
for f in walk(dest):
def pretty_all_xml_in_dir(path):
for f in walk(path):
if f.endswith('.xml') or f.endswith('.rels'):
with open(f, 'r+b') as stream:
raw = stream.read()
@ -31,6 +24,15 @@ def dump(path):
stream.truncate()
stream.write(etree.tostring(root, pretty_print=True, encoding='utf-8', xml_declaration=True))
def dump(path):
dest = os.path.splitext(os.path.basename(path))[0]
dest += '-dumped'
if os.path.exists(dest):
shutil.rmtree(dest)
with ZipFile(path) as zf:
zf.extractall(dest)
pretty_all_xml_in_dir(dest)
print (path, 'dumped to', dest)
if __name__ == '__main__':

View File

@ -70,6 +70,15 @@ def zip_rebuilder(tdir, path):
zfn = unicodedata.normalize('NFC', os.path.relpath(absfn, tdir).replace(os.sep, '/'))
zf.write(absfn, zfn)
def docx_exploder(path, tdir, question=lambda x:True):
zipextract(path, tdir)
from calibre.ebooks.docx.dump import pretty_all_xml_in_dir
pretty_all_xml_in_dir(tdir)
for f in walk(tdir):
if os.path.basename(f) == 'document.xml':
return f
raise Error('Invalid book: Could not find document.xml')
def get_tools(fmt):
fmt = fmt.lower()
@ -78,6 +87,8 @@ def get_tools(fmt):
ans = mobi_exploder, rebuild
elif fmt in {'epub', 'htmlz'}:
ans = zip_exploder, zip_rebuilder
elif fmt == 'docx':
ans = docx_exploder, zip_rebuilder
else:
ans = None, None