Add support for the CB7 comic file file format

Fixes #1912212 [Add support for CB7 and CBA file format](https://bugs.launchpad.net/calibre/+bug/1912212)
This commit is contained in:
Kovid Goyal 2021-01-19 13:33:12 +05:30
parent eae5ff6d88
commit 54d57c6748
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
8 changed files with 137 additions and 6 deletions

View File

@ -770,6 +770,25 @@
} }
}, },
{
"name": "texttable",
"comment": "needed for py7zr",
"unix": {
"filename": "texttable-1.6.3.tar.gz",
"hash": "sha256:ce0faf21aa77d806bbff22b107cc22cce68dc9438f97a2df32c93e9afa4ce436",
"urls": ["pypi"]
}
},
{
"name": "py7zr",
"unix": {
"filename": "py7zr-0.11.1.tar.gz",
"hash": "sha256:29a427f61c1be1907406fde59ae912dd7d44730771e58d643d5021f17fedba8e",
"urls": ["pypi"]
}
},
{ {
"name": "zeroconf", "name": "zeroconf",
"python": 3, "python": 3,

View File

@ -18,7 +18,7 @@ What formats does calibre support conversion to/from?
calibre supports the conversion of many input formats to many output formats. calibre supports the conversion of many input formats to many output formats.
It can convert every input format in the following list, to every output format. It can convert every input format in the following list, to every output format.
*Input Formats:* AZW, AZW3, AZW4, CBZ, CBR, CBC, CHM, DJVU, DOCX, EPUB, FB2, FBZ, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ *Input Formats:* AZW, AZW3, AZW4, CBZ, CBR, CB7, CBC, CHM, DJVU, DOCX, EPUB, FB2, FBZ, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
*Output Formats:* AZW3, EPUB, DOCX, FB2, HTMLZ, OEB, LIT, LRF, MOBI, PDB, PMLZ, RB, PDF, RTF, SNB, TCR, TXT, TXTZ, ZIP *Output Formats:* AZW3, EPUB, DOCX, FB2, HTMLZ, OEB, LIT, LRF, MOBI, PDB, PMLZ, RB, PDF, RTF, SNB, TCR, TXT, TXTZ, ZIP

View File

@ -187,15 +187,21 @@ def extract(path, dir):
elif id_.startswith(b'PK'): elif id_.startswith(b'PK'):
from calibre.libunzip import extract as zipextract from calibre.libunzip import extract as zipextract
extractor = zipextract extractor = zipextract
elif id_.startswith(b'7z'):
from calibre.utils.seven_zip import extract as seven_extract
extractor = seven_extract
if extractor is None: if extractor is None:
# Fallback to file extension # Fallback to file extension
ext = os.path.splitext(path)[1][1:].lower() ext = os.path.splitext(path)[1][1:].lower()
if ext in ['zip', 'cbz', 'epub', 'oebzip']: if ext in ('zip', 'cbz', 'epub', 'oebzip'):
from calibre.libunzip import extract as zipextract from calibre.libunzip import extract as zipextract
extractor = zipextract extractor = zipextract
elif ext in ['cbr', 'rar']: elif ext in ('cbr', 'rar'):
from calibre.utils.unrar import extract as rarextract from calibre.utils.unrar import extract as rarextract
extractor = rarextract extractor = rarextract
elif ext in ('cb7', '7z'):
from calibre.utils.seven_zip import extract as seven_extract
extractor = seven_extract
if extractor is None: if extractor is None:
raise Exception('Unknown archive type') raise Exception('Unknown archive type')
extractor(path, dir) extractor(path, dir)

View File

@ -133,7 +133,7 @@ plugins += [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, KPFExtract]
class ComicMetadataReader(MetadataReaderPlugin): class ComicMetadataReader(MetadataReaderPlugin):
name = 'Read comic metadata' name = 'Read comic metadata'
file_types = {'cbr', 'cbz'} file_types = {'cbr', 'cbz', 'cb7'}
description = _('Extract cover from comic files') description = _('Extract cover from comic files')
def customization_help(self, gui=False): def customization_help(self, gui=False):
@ -148,8 +148,12 @@ class ComicMetadataReader(MetadataReaderPlugin):
ftype = 'cbr' ftype = 'cbr'
elif id_.startswith(b'PK'): elif id_.startswith(b'PK'):
ftype = 'cbz' ftype = 'cbz'
elif id_.startswith(b'7z'):
ftype = 'cb7'
if ftype == 'cbr': if ftype == 'cbr':
from calibre.utils.unrar import extract_cover_image from calibre.utils.unrar import extract_cover_image
elif ftype == 'cb7':
from calibre.utils.seven_zip import extract_cover_image
else: else:
from calibre.libunzip import extract_cover_image from calibre.libunzip import extract_cover_image
from calibre.ebooks.metadata import MetaInformation from calibre.ebooks.metadata import MetaInformation

View File

@ -35,7 +35,7 @@ class ParserError(ValueError):
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm', BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'updb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'updb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cb7', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb', 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'docm', 'md', 'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'docm', 'md',
'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx', 'kpf'] 'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx', 'kpf']

View File

@ -21,7 +21,7 @@ class ComicInput(InputFormatPlugin):
name = 'Comic Input' name = 'Comic Input'
author = 'Kovid Goyal' author = 'Kovid Goyal'
description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices' description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
file_types = {'cbz', 'cbr', 'cbc'} file_types = {'cbz', 'cbr', 'cb7', 'cbc'}
is_image_collection = True is_image_collection = True
commit_name = 'comic_input' commit_name = 'comic_input'
core_usage = -1 core_usage = -1

View File

@ -373,6 +373,10 @@ class BuildTest(unittest.TestCase):
from calibre.utils.unrar import test_basic from calibre.utils.unrar import test_basic
test_basic() test_basic()
def test_7z(self):
from calibre.utils.seven_zip import test_basic
test_basic()
@unittest.skipUnless(iswindows, 'WPD is windows only') @unittest.skipUnless(iswindows, 'WPD is windows only')
def test_wpd(self): def test_wpd(self):
from calibre_extensions import wpd from calibre_extensions import wpd

View File

@ -0,0 +1,98 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
import os
import re
from calibre.constants import iswindows
def open_archive(path_or_stream, mode='r'):
from py7zr import SevenZipFile
return SevenZipFile(path_or_stream, mode=mode)
def names(path_or_stream):
with open_archive(path_or_stream) as zf:
return tuple(zf.getnames())
def extract_member(path_or_stream, match=None, name=None):
if iswindows and name is not None:
name = name.replace(os.sep, '/')
if match is None:
match = re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I)
def is_match(fname):
if iswindows:
fname = fname.replace(os.sep, '/')
return (name is not None and fname == name) or \
(match is not None and match.search(fname) is not None)
with open_archive(path_or_stream) as ar:
all_names = list(filter(is_match, ar.getnames()))
if all_names:
return all_names[0] , ar.read(all_names[:1])[all_names[0]].read()
def extract_cover_image(stream):
pos = stream.tell()
from calibre.libunzip import name_ok, sort_key
all_names = sorted(names(stream), key=sort_key)
stream.seek(pos)
for name in all_names:
if name_ok(name):
return extract_member(stream, name=name, match=None)
def extract(path_or_stream, location):
with open_archive(path_or_stream) as f:
f.extract(location)
# Test {{{
def test_basic():
from tempfile import TemporaryDirectory
from calibre import CurrentDir
tdata = {
'1/sub-one': b'sub-one\n',
'2/sub-two.txt': b'sub-two\n',
'F\xfc\xdfe.txt': b'unicode\n',
'max-compressed': b'max\n',
'one.txt': b'one\n',
'symlink': b'2/sub-two.txt',
'uncompressed': b'uncompressed\n',
'\u8bf6\u6bd4\u5c41.txt': b'chinese unicode\n'}
def do_test():
for name, data in tdata.items():
if '/' in name:
os.makedirs(os.path.dirname(name), exist_ok=True)
with open(name, 'wb') as f:
f.write(data)
with open_archive(os.path.join('a.7z'), mode='w') as zf:
for name in tdata:
zf.write(name)
with open_archive(os.path.join('a.7z')) as zf:
if set(zf.getnames()) != set(tdata):
raise ValueError('names not equal')
read_data = {name:af.read() for name, af in zf.readall().items()}
if read_data != tdata:
raise ValueError('data not equal')
for name in tdata:
if name not in '1 2 symlink'.split():
with open(os.path.join(tdir, name), 'rb') as s:
if s.read() != tdata[name]:
raise ValueError('Did not extract %s properly' % name)
with TemporaryDirectory('test-7z') as tdir, CurrentDir(tdir):
do_test()
if __name__ == '__main__':
test_basic()