Support 7z archives in addition to ZIP and RAR. Fixes #2036266 [[Enhancement] support 7zip archives](https://bugs.launchpad.net/calibre/+bug/2036266)

This commit is contained in:
Kovid Goyal 2023-09-16 10:48:04 +05:30
parent 5507e42308
commit 7c8a20ac30
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 65 additions and 36 deletions

View File

@ -60,29 +60,61 @@ class KPFExtract(FileTypePlugin):
of.write(zf.read(candidates[0])) of.write(zf.read(candidates[0]))
return of.name return of.name
class RAR:
def __init__(self, archive):
self.archive = archive
def close(self):
pass
def namelist(self):
from calibre.utils.unrar import names
return list(names(self.archive))
def read(self, fname):
from calibre.utils.unrar import extract_member
return extract_member(self.archive, match=None, name=fname)[1]
class SevenZip:
def __init__(self, archive):
from py7zr import SevenZipFile
self.zf = SevenZipFile(archive, 'r')
def namelist(self):
return list(self.zf.getnames())
def close(self):
self.zf.close()
def read(self, fname):
return self.zf.read((fname,))[fname].read()
class ArchiveExtract(FileTypePlugin): class ArchiveExtract(FileTypePlugin):
name = 'Archive Extract' name = 'Archive Extract'
author = 'Kovid Goyal' author = 'Kovid Goyal'
description = _('Extract common e-book formats from archive files ' description = _('Extract common e-book formats from archive files '
'(ZIP/RAR). Also try to autodetect if they are actually ' '(ZIP/RAR/7z). Also try to autodetect if they are actually '
'CBZ/CBR files.') 'CBZ/CBR/CB7 files.')
file_types = {'zip', 'rar'} file_types = {'zip', 'rar', '7z'}
supported_platforms = ['windows', 'osx', 'linux'] supported_platforms = ['windows', 'osx', 'linux']
on_import = True on_import = True
def run(self, archive): def run(self, archive):
from calibre.utils.zipfile import ZipFile q = archive.lower()
is_rar = archive.lower().endswith('.rar') if q.endswith('.rar'):
if is_rar: comic_ext = 'cbr'
from calibre.utils.unrar import extract_member, names zf = RAR(archive)
elif q.endswith('.7z'):
comic_ext = 'cb7'
zf = SevenZip(archive)
else: else:
from calibre.utils.zipfile import ZipFile
zf = ZipFile(archive, 'r') zf = ZipFile(archive, 'r')
comic_ext = 'cbz'
if is_rar:
fnames = list(names(archive))
else:
fnames = zf.namelist()
def fname_ok(fname): def fname_ok(fname):
bn = os.path.basename(fname).lower() bn = os.path.basename(fname).lower()
@ -96,31 +128,28 @@ class ArchiveExtract(FileTypePlugin):
return False return False
return True return True
fnames = list(filter(fname_ok, fnames)) with closing(zf):
if is_comic(fnames): fnames = zf.namelist()
ext = '.cbr' if is_rar else '.cbz' fnames = list(filter(fname_ok, fnames))
of = self.temporary_file('_archive_extract'+ext) if is_comic(fnames):
with open(archive, 'rb') as f: ext = comic_ext
of.write(f.read()) of = self.temporary_file('_archive_extract'+ext)
of.close() with closing(of), open(archive, 'rb') as f:
return of.name of.write(f.read())
if len(fnames) > 1 or not fnames: return of.name
return archive if len(fnames) > 1 or not fnames:
fname = fnames[0] return archive
ext = os.path.splitext(fname)[1][1:] fname = fnames[0]
if ext.lower() not in { ext = os.path.splitext(fname)[1][1:]
'lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb', if ext.lower() not in {
'azw', 'azw1', 'azw3', 'fb2', 'docx', 'doc', 'odt'}: 'lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb',
return archive 'azw', 'azw1', 'azw3', 'fb2', 'docx', 'doc', 'odt'}:
return archive
of = self.temporary_file('_archive_extract.'+ext) of = self.temporary_file('_archive_extract.'+ext)
with closing(of): with closing(of):
if is_rar:
data = extract_member(archive, match=None, name=fname)[1]
of.write(data)
else:
of.write(zf.read(fname)) of.write(zf.read(fname))
return of.name return of.name
def get_comic_book_info(d, mi, series_index='volume'): def get_comic_book_info(d, mi, series_index='volume'):

View File

@ -31,7 +31,7 @@ from polyglot.builtins import iteritems, string_or_bytes
def get_filters(): def get_filters():
archives = ['zip', 'rar'] archives = ['zip', 'rar', '7z']
return [ return [
(_('Books'), [x for x in BOOK_EXTENSIONS if x not in archives]), (_('Books'), [x for x in BOOK_EXTENSIONS if x not in archives]),
(_('EPUB books'), ['epub', 'kepub']), (_('EPUB books'), ['epub', 'kepub']),