Add support for the CB7 comic file file format

Fixes #1912212 [Add support for CB7 and CBA file format](https://bugs.launchpad.net/calibre/+bug/1912212)
This commit is contained in:
Kovid Goyal 2021-01-19 13:33:12 +05:30
parent eae5ff6d88
commit 54d57c6748
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
8 changed files with 137 additions and 6 deletions

View File

@ -770,6 +770,25 @@
}
},
{
"name": "texttable",
"comment": "needed for py7zr",
"unix": {
"filename": "texttable-1.6.3.tar.gz",
"hash": "sha256:ce0faf21aa77d806bbff22b107cc22cce68dc9438f97a2df32c93e9afa4ce436",
"urls": ["pypi"]
}
},
{
"name": "py7zr",
"unix": {
"filename": "py7zr-0.11.1.tar.gz",
"hash": "sha256:29a427f61c1be1907406fde59ae912dd7d44730771e58d643d5021f17fedba8e",
"urls": ["pypi"]
}
},
{
"name": "zeroconf",
"python": 3,

View File

@ -18,7 +18,7 @@ What formats does calibre support conversion to/from?
calibre supports the conversion of many input formats to many output formats.
It can convert every input format in the following list, to every output format.
*Input Formats:* AZW, AZW3, AZW4, CBZ, CBR, CBC, CHM, DJVU, DOCX, EPUB, FB2, FBZ, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
*Input Formats:* AZW, AZW3, AZW4, CBZ, CBR, CB7, CBC, CHM, DJVU, DOCX, EPUB, FB2, FBZ, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
*Output Formats:* AZW3, EPUB, DOCX, FB2, HTMLZ, OEB, LIT, LRF, MOBI, PDB, PMLZ, RB, PDF, RTF, SNB, TCR, TXT, TXTZ, ZIP

View File

@ -187,15 +187,21 @@ def extract(path, dir):
elif id_.startswith(b'PK'):
from calibre.libunzip import extract as zipextract
extractor = zipextract
elif id_.startswith(b'7z'):
from calibre.utils.seven_zip import extract as seven_extract
extractor = seven_extract
if extractor is None:
# Fallback to file extension
ext = os.path.splitext(path)[1][1:].lower()
if ext in ['zip', 'cbz', 'epub', 'oebzip']:
if ext in ('zip', 'cbz', 'epub', 'oebzip'):
from calibre.libunzip import extract as zipextract
extractor = zipextract
elif ext in ['cbr', 'rar']:
elif ext in ('cbr', 'rar'):
from calibre.utils.unrar import extract as rarextract
extractor = rarextract
elif ext in ('cb7', '7z'):
from calibre.utils.seven_zip import extract as seven_extract
extractor = seven_extract
if extractor is None:
raise Exception('Unknown archive type')
extractor(path, dir)

View File

@ -133,7 +133,7 @@ plugins += [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, KPFExtract]
class ComicMetadataReader(MetadataReaderPlugin):
name = 'Read comic metadata'
file_types = {'cbr', 'cbz'}
file_types = {'cbr', 'cbz', 'cb7'}
description = _('Extract cover from comic files')
def customization_help(self, gui=False):
@ -148,8 +148,12 @@ class ComicMetadataReader(MetadataReaderPlugin):
ftype = 'cbr'
elif id_.startswith(b'PK'):
ftype = 'cbz'
elif id_.startswith(b'7z'):
ftype = 'cb7'
if ftype == 'cbr':
from calibre.utils.unrar import extract_cover_image
elif ftype == 'cb7':
from calibre.utils.seven_zip import extract_cover_image
else:
from calibre.libunzip import extract_cover_image
from calibre.ebooks.metadata import MetaInformation

View File

@ -35,7 +35,7 @@ class ParserError(ValueError):
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'updb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cb7', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'docm', 'md',
'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx', 'kpf']

View File

@ -21,7 +21,7 @@ class ComicInput(InputFormatPlugin):
name = 'Comic Input'
author = 'Kovid Goyal'
description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
file_types = {'cbz', 'cbr', 'cbc'}
file_types = {'cbz', 'cbr', 'cb7', 'cbc'}
is_image_collection = True
commit_name = 'comic_input'
core_usage = -1

View File

@ -373,6 +373,10 @@ class BuildTest(unittest.TestCase):
from calibre.utils.unrar import test_basic
test_basic()
def test_7z(self):
from calibre.utils.seven_zip import test_basic
test_basic()
@unittest.skipUnless(iswindows, 'WPD is windows only')
def test_wpd(self):
from calibre_extensions import wpd

View File

@ -0,0 +1,98 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
import os
import re
from calibre.constants import iswindows
def open_archive(path_or_stream, mode='r'):
from py7zr import SevenZipFile
return SevenZipFile(path_or_stream, mode=mode)
def names(path_or_stream):
with open_archive(path_or_stream) as zf:
return tuple(zf.getnames())
def extract_member(path_or_stream, match=None, name=None):
if iswindows and name is not None:
name = name.replace(os.sep, '/')
if match is None:
match = re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I)
def is_match(fname):
if iswindows:
fname = fname.replace(os.sep, '/')
return (name is not None and fname == name) or \
(match is not None and match.search(fname) is not None)
with open_archive(path_or_stream) as ar:
all_names = list(filter(is_match, ar.getnames()))
if all_names:
return all_names[0] , ar.read(all_names[:1])[all_names[0]].read()
def extract_cover_image(stream):
pos = stream.tell()
from calibre.libunzip import name_ok, sort_key
all_names = sorted(names(stream), key=sort_key)
stream.seek(pos)
for name in all_names:
if name_ok(name):
return extract_member(stream, name=name, match=None)
def extract(path_or_stream, location):
with open_archive(path_or_stream) as f:
f.extract(location)
# Test {{{
def test_basic():
from tempfile import TemporaryDirectory
from calibre import CurrentDir
tdata = {
'1/sub-one': b'sub-one\n',
'2/sub-two.txt': b'sub-two\n',
'F\xfc\xdfe.txt': b'unicode\n',
'max-compressed': b'max\n',
'one.txt': b'one\n',
'symlink': b'2/sub-two.txt',
'uncompressed': b'uncompressed\n',
'\u8bf6\u6bd4\u5c41.txt': b'chinese unicode\n'}
def do_test():
for name, data in tdata.items():
if '/' in name:
os.makedirs(os.path.dirname(name), exist_ok=True)
with open(name, 'wb') as f:
f.write(data)
with open_archive(os.path.join('a.7z'), mode='w') as zf:
for name in tdata:
zf.write(name)
with open_archive(os.path.join('a.7z')) as zf:
if set(zf.getnames()) != set(tdata):
raise ValueError('names not equal')
read_data = {name:af.read() for name, af in zf.readall().items()}
if read_data != tdata:
raise ValueError('data not equal')
for name in tdata:
if name not in '1 2 symlink'.split():
with open(os.path.join(tdir, name), 'rb') as s:
if s.read() != tdata[name]:
raise ValueError('Did not extract %s properly' % name)
with TemporaryDirectory('test-7z') as tdir, CurrentDir(tdir):
do_test()
if __name__ == '__main__':
test_basic()