Make extraction of ebook from zip/rar archive a little smarter

This commit is contained in:
Kovid Goyal 2015-04-25 19:18:16 +05:30
parent b0e51635f5
commit 0fc7ae64ca

View File

@ -60,7 +60,19 @@ class ArchiveExtract(FileTypePlugin):
else: else:
fnames = zf.namelist() fnames = zf.namelist()
fnames = [x for x in fnames if '.' in x and x.lower().rpartition('/')[-1] != 'thumbs.db'] def fname_ok(fname):
bn = os.path.basename(fname).lower()
if bn == 'thumbs.db':
return False
if '.' not in bn:
return False
if bn.rpartition('.')[-1] in {'diz', 'nfo'}:
return False
if '__MACOSX' in fname.split('/'):
return False
return True
fnames = list(filter(fname_ok, fnames))
if is_comic(fnames): if is_comic(fnames):
ext = '.cbr' if is_rar else '.cbz' ext = '.cbr' if is_rar else '.cbz'
of = self.temporary_file('_archive_extract'+ext) of = self.temporary_file('_archive_extract'+ext)
@ -72,8 +84,9 @@ class ArchiveExtract(FileTypePlugin):
return archive return archive
fname = fnames[0] fname = fnames[0]
ext = os.path.splitext(fname)[1][1:] ext = os.path.splitext(fname)[1][1:]
if ext.lower() not in ('lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', if ext.lower() not in {
'mp3', 'pdb', 'azw', 'azw1', 'azw3', 'fb2'): 'lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb',
'azw', 'azw1', 'azw3', 'fb2', 'docx', 'doc', 'odt'}:
return archive return archive
of = self.temporary_file('_archive_extract.'+ext) of = self.temporary_file('_archive_extract.'+ext)