From 0fc7ae64ca1113367fa23050afb1f9fc4d6a3c04 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 25 Apr 2015 19:18:16 +0530 Subject: [PATCH] Make extraction of ebook from zip/rar archive a little smarter --- src/calibre/ebooks/metadata/archive.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/metadata/archive.py b/src/calibre/ebooks/metadata/archive.py index adbea86521..c0734fd78f 100644 --- a/src/calibre/ebooks/metadata/archive.py +++ b/src/calibre/ebooks/metadata/archive.py @@ -60,7 +60,19 @@ class ArchiveExtract(FileTypePlugin): else: fnames = zf.namelist() - fnames = [x for x in fnames if '.' in x and x.lower().rpartition('/')[-1] != 'thumbs.db'] + def fname_ok(fname): + bn = os.path.basename(fname).lower() + if bn == 'thumbs.db': + return False + if '.' not in bn: + return False + if bn.rpartition('.')[-1] in {'diz', 'nfo'}: + return False + if '__MACOSX' in fname.split('/'): + return False + return True + + fnames = list(filter(fname_ok, fnames)) if is_comic(fnames): ext = '.cbr' if is_rar else '.cbz' of = self.temporary_file('_archive_extract'+ext) @@ -72,8 +84,9 @@ class ArchiveExtract(FileTypePlugin): return archive fname = fnames[0] ext = os.path.splitext(fname)[1][1:] - if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', - 'mp3', 'pdb', 'azw', 'azw1', 'azw3', 'fb2'): + if ext.lower() not in { + 'lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb', + 'azw', 'azw1', 'azw3', 'fb2', 'docx', 'doc', 'odt'}: return archive of = self.temporary_file('_archive_extract.'+ext)