Add support for the CB7 comic file file format

Fixes #1912212 [Add support for CB7 and CBA file format](https://bugs.launchpad.net/calibre/+bug/1912212)
2025-07-09 03:04:10 -04:00 · 2021-01-19 13:33:12 +05:30 · 2021-01-19 13:33:12 +05:30 · 54d57c6748
commit 54d57c6748
parent eae5ff6d88
8 changed files with 137 additions and 6 deletions
--- a/bypy/sources.json
+++ b/bypy/sources.json
@ -770,6 +770,25 @@
        }
    },

+    {
+        "name": "texttable",
+		"comment": "needed for py7zr",
+        "unix": {
+            "filename": "texttable-1.6.3.tar.gz",
+            "hash": "sha256:ce0faf21aa77d806bbff22b107cc22cce68dc9438f97a2df32c93e9afa4ce436",
+            "urls": ["pypi"]
+        }
+    },
+
+    {
+        "name": "py7zr",
+        "unix": {
+            "filename": "py7zr-0.11.1.tar.gz",
+            "hash": "sha256:29a427f61c1be1907406fde59ae912dd7d44730771e58d643d5021f17fedba8e",
+            "urls": ["pypi"]
+        }
+    },
+
    {
        "name": "zeroconf",
 		"python": 3,
--- a/manual/faq.rst
+++ b/manual/faq.rst
@ -18,7 +18,7 @@ What formats does calibre support conversion to/from?
 calibre supports the conversion of many input formats to many output formats.
 It can convert every input format in the following list, to every output format.

-*Input Formats:* AZW, AZW3, AZW4, CBZ, CBR, CBC, CHM, DJVU, DOCX, EPUB, FB2, FBZ, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ
+*Input Formats:* AZW, AZW3, AZW4, CBZ, CBR, CB7, CBC, CHM, DJVU, DOCX, EPUB, FB2, FBZ, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ

 *Output Formats:* AZW3, EPUB, DOCX, FB2, HTMLZ, OEB, LIT, LRF, MOBI, PDB, PMLZ, RB, PDF, RTF, SNB, TCR, TXT, TXTZ, ZIP

--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -187,15 +187,21 @@ def extract(path, dir):
    elif id_.startswith(b'PK'):
        from calibre.libunzip import extract as zipextract
        extractor = zipextract
+    elif id_.startswith(b'7z'):
+        from calibre.utils.seven_zip import extract as seven_extract
+        extractor = seven_extract
    if extractor is None:
        # Fallback to file extension
        ext = os.path.splitext(path)[1][1:].lower()
-        if ext in ['zip', 'cbz', 'epub', 'oebzip']:
+        if ext in ('zip', 'cbz', 'epub', 'oebzip'):
            from calibre.libunzip import extract as zipextract
            extractor = zipextract
-        elif ext in ['cbr', 'rar']:
+        elif ext in ('cbr', 'rar'):
            from calibre.utils.unrar import extract as rarextract
            extractor = rarextract
+        elif ext in ('cb7', '7z'):
+            from calibre.utils.seven_zip import extract as seven_extract
+            extractor = seven_extract
    if extractor is None:
        raise Exception('Unknown archive type')
    extractor(path, dir)
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -133,7 +133,7 @@ plugins += [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, KPFExtract]
 class ComicMetadataReader(MetadataReaderPlugin):

    name = 'Read comic metadata'
-    file_types = {'cbr', 'cbz'}
+    file_types = {'cbr', 'cbz', 'cb7'}
    description = _('Extract cover from comic files')

    def customization_help(self, gui=False):
@ -148,8 +148,12 @@ class ComicMetadataReader(MetadataReaderPlugin):
                ftype = 'cbr'
            elif id_.startswith(b'PK'):
                ftype = 'cbz'
+            elif id_.startswith(b'7z'):
+                ftype = 'cb7'
        if ftype == 'cbr':
            from calibre.utils.unrar import extract_cover_image
+        elif ftype == 'cb7':
+            from calibre.utils.seven_zip import extract_cover_image
        else:
            from calibre.libunzip import extract_cover_image
        from calibre.ebooks.metadata import MetaInformation
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -35,7 +35,7 @@ class ParserError(ValueError):

 BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
                   'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'updb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
-                   'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
+                   'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cb7', 'cbz', 'cbc', 'oebzip',
                   'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb',
                   'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'docm', 'md',
                   'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx', 'kpf']
--- a/src/calibre/ebooks/conversion/plugins/comic_input.py
+++ b/src/calibre/ebooks/conversion/plugins/comic_input.py
@ -21,7 +21,7 @@ class ComicInput(InputFormatPlugin):
    name        = 'Comic Input'
    author      = 'Kovid Goyal'
    description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
-    file_types  = {'cbz', 'cbr', 'cbc'}
+    file_types  = {'cbz', 'cbr', 'cb7', 'cbc'}
    is_image_collection = True
    commit_name = 'comic_input'
    core_usage = -1
--- a/src/calibre/test_build.py
+++ b/src/calibre/test_build.py
@ -373,6 +373,10 @@ class BuildTest(unittest.TestCase):
        from calibre.utils.unrar import test_basic
        test_basic()

+    def test_7z(self):
+        from calibre.utils.seven_zip import test_basic
+        test_basic()
+
    @unittest.skipUnless(iswindows, 'WPD is windows only')
    def test_wpd(self):
        from calibre_extensions import wpd
--- a/src/calibre/utils/seven_zip.py
+++ b/src/calibre/utils/seven_zip.py
@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+# License: GPL v3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
+
+import os
+import re
+
+from calibre.constants import iswindows
+
+
+def open_archive(path_or_stream, mode='r'):
+    from py7zr import SevenZipFile
+    return SevenZipFile(path_or_stream, mode=mode)
+
+
+def names(path_or_stream):
+    with open_archive(path_or_stream) as zf:
+        return tuple(zf.getnames())
+
+
+def extract_member(path_or_stream, match=None, name=None):
+    if iswindows and name is not None:
+        name = name.replace(os.sep, '/')
+    if match is None:
+        match = re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I)
+
+    def is_match(fname):
+        if iswindows:
+            fname = fname.replace(os.sep, '/')
+        return (name is not None and fname == name) or \
+               (match is not None and match.search(fname) is not None)
+
+    with open_archive(path_or_stream) as ar:
+        all_names = list(filter(is_match, ar.getnames()))
+        if all_names:
+            return all_names[0] , ar.read(all_names[:1])[all_names[0]].read()
+
+
+def extract_cover_image(stream):
+    pos = stream.tell()
+    from calibre.libunzip import name_ok, sort_key
+    all_names = sorted(names(stream), key=sort_key)
+    stream.seek(pos)
+    for name in all_names:
+        if name_ok(name):
+            return extract_member(stream, name=name, match=None)
+
+
+def extract(path_or_stream, location):
+    with open_archive(path_or_stream) as f:
+        f.extract(location)
+
+
+# Test {{{
+
+
+def test_basic():
+    from tempfile import TemporaryDirectory
+    from calibre import CurrentDir
+
+    tdata = {
+        '1/sub-one': b'sub-one\n',
+        '2/sub-two.txt': b'sub-two\n',
+        'F\xfc\xdfe.txt': b'unicode\n',
+        'max-compressed': b'max\n',
+        'one.txt': b'one\n',
+        'symlink': b'2/sub-two.txt',
+        'uncompressed': b'uncompressed\n',
+        '\u8bf6\u6bd4\u5c41.txt': b'chinese unicode\n'}
+
+    def do_test():
+        for name, data in tdata.items():
+            if '/' in name:
+                os.makedirs(os.path.dirname(name), exist_ok=True)
+            with open(name, 'wb') as f:
+                f.write(data)
+        with open_archive(os.path.join('a.7z'), mode='w') as zf:
+            for name in tdata:
+                zf.write(name)
+        with open_archive(os.path.join('a.7z')) as zf:
+            if set(zf.getnames()) != set(tdata):
+                raise ValueError('names not equal')
+            read_data = {name:af.read() for name, af in zf.readall().items()}
+            if read_data != tdata:
+                raise ValueError('data not equal')
+
+        for name in tdata:
+            if name not in '1 2 symlink'.split():
+                with open(os.path.join(tdir, name), 'rb') as s:
+                    if s.read() != tdata[name]:
+                        raise ValueError('Did not extract %s properly' % name)
+
+    with TemporaryDirectory('test-7z') as tdir, CurrentDir(tdir):
+        do_test()
+
+
+if __name__ == '__main__':
+    test_basic()