Make extraction of ebook from zip/rar archive a little smarter

2025-07-09 03:04:10 -04:00 · 2015-04-25 19:18:16 +05:30 · 2015-04-25 19:18:16 +05:30 · 0fc7ae64ca
commit 0fc7ae64ca
parent b0e51635f5
1 changed files with 16 additions and 3 deletions
--- a/src/calibre/ebooks/metadata/archive.py
+++ b/src/calibre/ebooks/metadata/archive.py
@ -60,7 +60,19 @@ class ArchiveExtract(FileTypePlugin):
        else:
            fnames = zf.namelist()
-        fnames = [x for x in fnames if '.' in x and x.lower().rpartition('/')[-1] != 'thumbs.db']
+        def fname_ok(fname):
            bn = os.path.basename(fname).lower()
            if bn == 'thumbs.db':
                return False
            if '.' not in bn:
                return False
            if bn.rpartition('.')[-1] in {'diz', 'nfo'}:
                return False
            if '__MACOSX' in fname.split('/'):
                return False
            return True
        fnames = list(filter(fname_ok, fnames))
        if is_comic(fnames):
            ext = '.cbr' if is_rar else '.cbz'
            of = self.temporary_file('_archive_extract'+ext)
@ -72,8 +84,9 @@ class ArchiveExtract(FileTypePlugin):
            return archive
        fname = fnames[0]
        ext = os.path.splitext(fname)[1][1:]
-        if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf',
+        if ext.lower() not in {
-                'mp3', 'pdb', 'azw', 'azw1', 'azw3', 'fb2'):
+                'lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb',
                'azw', 'azw1', 'azw3', 'fb2', 'docx', 'doc', 'odt'}:
            return archive
        of = self.temporary_file('_archive_extract.'+ext)