diff --git a/src/calibre/ebooks/metadata/archive.py b/src/calibre/ebooks/metadata/archive.py index adbea86521..c0734fd78f 100644 --- a/src/calibre/ebooks/metadata/archive.py +++ b/src/calibre/ebooks/metadata/archive.py @@ -60,7 +60,19 @@ class ArchiveExtract(FileTypePlugin): else: fnames = zf.namelist() - fnames = [x for x in fnames if '.' in x and x.lower().rpartition('/')[-1] != 'thumbs.db'] + def fname_ok(fname): + bn = os.path.basename(fname).lower() + if bn == 'thumbs.db': + return False + if '.' not in bn: + return False + if bn.rpartition('.')[-1] in {'diz', 'nfo'}: + return False + if '__MACOSX' in fname.split('/'): + return False + return True + + fnames = list(filter(fname_ok, fnames)) if is_comic(fnames): ext = '.cbr' if is_rar else '.cbz' of = self.temporary_file('_archive_extract'+ext) @@ -72,8 +84,9 @@ class ArchiveExtract(FileTypePlugin): return archive fname = fnames[0] ext = os.path.splitext(fname)[1][1:] - if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', - 'mp3', 'pdb', 'azw', 'azw1', 'azw3', 'fb2'): + if ext.lower() not in { + 'lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb', + 'azw', 'azw1', 'azw3', 'fb2', 'docx', 'doc', 'odt'}: return archive of = self.temporary_file('_archive_extract.'+ext)