Add support for the CB7 comic file file format

Fixes #1912212 [Add support for CB7 and CBA file format](https://bugs.launchpad.net/calibre/+bug/1912212)
2025-07-09 03:04:10 -04:00 · 2021-01-19 13:33:12 +05:30 · 2021-01-19 13:33:12 +05:30 · 54d57c6748
commit 54d57c6748
parent eae5ff6d88
8 changed files with 137 additions and 6 deletions
--- a/bypy/sources.json
+++ b/bypy/sources.json
@ -770,6 +770,25 @@
        }
    },
    {
        "name": "texttable",
 		"comment": "needed for py7zr",
        "unix": {
            "filename": "texttable-1.6.3.tar.gz",
            "hash": "sha256:ce0faf21aa77d806bbff22b107cc22cce68dc9438f97a2df32c93e9afa4ce436",
            "urls": ["pypi"]
        }
    },
    {
        "name": "py7zr",
        "unix": {
            "filename": "py7zr-0.11.1.tar.gz",
            "hash": "sha256:29a427f61c1be1907406fde59ae912dd7d44730771e58d643d5021f17fedba8e",
            "urls": ["pypi"]
        }
    },
    {
        "name": "zeroconf",
 		"python": 3,
--- a/manual/faq.rst
+++ b/manual/faq.rst
@ -18,7 +18,7 @@ What formats does calibre support conversion to/from?
 calibre supports the conversion of many input formats to many output formats.
 It can convert every input format in the following list, to every output format.
-*Input Formats:* AZW, AZW3, AZW4, CBZ, CBR, CBC, CHM, DJVU, DOCX, EPUB, FB2, FBZ, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
+*Input Formats:* AZW, AZW3, AZW4, CBZ, CBR, CB7, CBC, CHM, DJVU, DOCX, EPUB, FB2, FBZ, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
 *Output Formats:* AZW3, EPUB, DOCX, FB2, HTMLZ, OEB, LIT, LRF, MOBI, PDB, PMLZ, RB, PDF, RTF, SNB, TCR, TXT, TXTZ, ZIP
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -187,15 +187,21 @@ def extract(path, dir):
    elif id_.startswith(b'PK'):
        from calibre.libunzip import extract as zipextract
        extractor = zipextract
    elif id_.startswith(b'7z'):
        from calibre.utils.seven_zip import extract as seven_extract
        extractor = seven_extract
    if extractor is None:
        # Fallback to file extension
        ext = os.path.splitext(path)[1][1:].lower()
-        if ext in ['zip', 'cbz', 'epub', 'oebzip']:
+        if ext in ('zip', 'cbz', 'epub', 'oebzip'):
            from calibre.libunzip import extract as zipextract
            extractor = zipextract
-        elif ext in ['cbr', 'rar']:
+        elif ext in ('cbr', 'rar'):
            from calibre.utils.unrar import extract as rarextract
            extractor = rarextract
        elif ext in ('cb7', '7z'):
            from calibre.utils.seven_zip import extract as seven_extract
            extractor = seven_extract
    if extractor is None:
        raise Exception('Unknown archive type')
    extractor(path, dir)
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -133,7 +133,7 @@ plugins += [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, KPFExtract]
 class ComicMetadataReader(MetadataReaderPlugin):
    name = 'Read comic metadata'
-    file_types = {'cbr', 'cbz'}
+    file_types = {'cbr', 'cbz', 'cb7'}
    description = _('Extract cover from comic files')
    def customization_help(self, gui=False):
@ -148,8 +148,12 @@ class ComicMetadataReader(MetadataReaderPlugin):
                ftype = 'cbr'
            elif id_.startswith(b'PK'):
                ftype = 'cbz'
            elif id_.startswith(b'7z'):
                ftype = 'cb7'
        if ftype == 'cbr':
            from calibre.utils.unrar import extract_cover_image
        elif ftype == 'cb7':
            from calibre.utils.seven_zip import extract_cover_image
        else:
            from calibre.libunzip import extract_cover_image
        from calibre.ebooks.metadata import MetaInformation
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -35,7 +35,7 @@ class ParserError(ValueError):
 BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
                   'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'updb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
-                   'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
+                   'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cb7', 'cbz', 'cbc', 'oebzip',
                   'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
                   'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'docm', 'md',
                   'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx', 'kpf']
--- a/src/calibre/ebooks/conversion/plugins/comic_input.py
+++ b/src/calibre/ebooks/conversion/plugins/comic_input.py
@ -21,7 +21,7 @@ class ComicInput(InputFormatPlugin):
    name        = 'Comic Input'
    author      = 'Kovid Goyal'
    description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
-    file_types  = {'cbz', 'cbr', 'cbc'}
+    file_types  = {'cbz', 'cbr', 'cb7', 'cbc'}
    is_image_collection = True
    commit_name = 'comic_input'
    core_usage = -1
--- a/src/calibre/test_build.py
+++ b/src/calibre/test_build.py
@ -373,6 +373,10 @@ class BuildTest(unittest.TestCase):
        from calibre.utils.unrar import test_basic
        test_basic()
    def test_7z(self):
        from calibre.utils.seven_zip import test_basic
        test_basic()
    @unittest.skipUnless(iswindows, 'WPD is windows only')
    def test_wpd(self):
        from calibre_extensions import wpd
--- a/src/calibre/utils/seven_zip.py
+++ b/src/calibre/utils/seven_zip.py
@ -0,0 +1,98 @@
 #!/usr/bin/env python
 # vim:fileencoding=utf-8
 # License: GPL v3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
 import os
 import re
 from calibre.constants import iswindows
 def open_archive(path_or_stream, mode='r'):
    from py7zr import SevenZipFile
    return SevenZipFile(path_or_stream, mode=mode)
 def names(path_or_stream):
    with open_archive(path_or_stream) as zf:
        return tuple(zf.getnames())
 def extract_member(path_or_stream, match=None, name=None):
    if iswindows and name is not None:
        name = name.replace(os.sep, '/')
    if match is None:
        match = re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I)
    def is_match(fname):
        if iswindows:
            fname = fname.replace(os.sep, '/')
        return (name is not None and fname == name) or \
               (match is not None and match.search(fname) is not None)
    with open_archive(path_or_stream) as ar:
        all_names = list(filter(is_match, ar.getnames()))
        if all_names:
            return all_names[0] , ar.read(all_names[:1])[all_names[0]].read()
 def extract_cover_image(stream):
    pos = stream.tell()
    from calibre.libunzip import name_ok, sort_key
    all_names = sorted(names(stream), key=sort_key)
    stream.seek(pos)
    for name in all_names:
        if name_ok(name):
            return extract_member(stream, name=name, match=None)
 def extract(path_or_stream, location):
    with open_archive(path_or_stream) as f:
        f.extract(location)
 # Test {{{
 def test_basic():
    from tempfile import TemporaryDirectory
    from calibre import CurrentDir
    tdata = {
        '1/sub-one': b'sub-one\n',
        '2/sub-two.txt': b'sub-two\n',
        'F\xfc\xdfe.txt': b'unicode\n',
        'max-compressed': b'max\n',
        'one.txt': b'one\n',
        'symlink': b'2/sub-two.txt',
        'uncompressed': b'uncompressed\n',
        '\u8bf6\u6bd4\u5c41.txt': b'chinese unicode\n'}
    def do_test():
        for name, data in tdata.items():
            if '/' in name:
                os.makedirs(os.path.dirname(name), exist_ok=True)
            with open(name, 'wb') as f:
                f.write(data)
        with open_archive(os.path.join('a.7z'), mode='w') as zf:
            for name in tdata:
                zf.write(name)
        with open_archive(os.path.join('a.7z')) as zf:
            if set(zf.getnames()) != set(tdata):
                raise ValueError('names not equal')
            read_data = {name:af.read() for name, af in zf.readall().items()}
            if read_data != tdata:
                raise ValueError('data not equal')
        for name in tdata:
            if name not in '1 2 symlink'.split():
                with open(os.path.join(tdir, name), 'rb') as s:
                    if s.read() != tdata[name]:
                        raise ValueError('Did not extract %s properly' % name)
    with TemporaryDirectory('test-7z') as tdir, CurrentDir(tdir):
        do_test()
 if __name__ == '__main__':
    test_basic()