diff --git a/bypy/sources.json b/bypy/sources.json index 1e43d8422a..6d91dbf282 100644 --- a/bypy/sources.json +++ b/bypy/sources.json @@ -770,6 +770,25 @@ } }, + { + "name": "texttable", + "comment": "needed for py7zr", + "unix": { + "filename": "texttable-1.6.3.tar.gz", + "hash": "sha256:ce0faf21aa77d806bbff22b107cc22cce68dc9438f97a2df32c93e9afa4ce436", + "urls": ["pypi"] + } + }, + + { + "name": "py7zr", + "unix": { + "filename": "py7zr-0.11.1.tar.gz", + "hash": "sha256:29a427f61c1be1907406fde59ae912dd7d44730771e58d643d5021f17fedba8e", + "urls": ["pypi"] + } + }, + { "name": "zeroconf", "python": 3, diff --git a/manual/faq.rst b/manual/faq.rst index ac13f552cd..c37949100c 100644 --- a/manual/faq.rst +++ b/manual/faq.rst @@ -18,7 +18,7 @@ What formats does calibre support conversion to/from? calibre supports the conversion of many input formats to many output formats. It can convert every input format in the following list, to every output format. -*Input Formats:* AZW, AZW3, AZW4, CBZ, CBR, CBC, CHM, DJVU, DOCX, EPUB, FB2, FBZ, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ +*Input Formats:* AZW, AZW3, AZW4, CBZ, CBR, CB7, CBC, CHM, DJVU, DOCX, EPUB, FB2, FBZ, HTML, HTMLZ, LIT, LRF, MOBI, ODT, PDF, PRC, PDB, PML, RB, RTF, SNB, TCR, TXT, TXTZ *Output Formats:* AZW3, EPUB, DOCX, FB2, HTMLZ, OEB, LIT, LRF, MOBI, PDB, PMLZ, RB, PDF, RTF, SNB, TCR, TXT, TXTZ, ZIP diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 531589ec58..29e4cc5f68 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -187,15 +187,21 @@ def extract(path, dir): elif id_.startswith(b'PK'): from calibre.libunzip import extract as zipextract extractor = zipextract + elif id_.startswith(b'7z'): + from calibre.utils.seven_zip import extract as seven_extract + extractor = seven_extract if extractor is None: # Fallback to file extension ext = os.path.splitext(path)[1][1:].lower() - if ext in ['zip', 'cbz', 'epub', 'oebzip']: + if ext in ('zip', 'cbz', 'epub', 'oebzip'): from calibre.libunzip import extract as zipextract extractor = zipextract - elif ext in ['cbr', 'rar']: + elif ext in ('cbr', 'rar'): from calibre.utils.unrar import extract as rarextract extractor = rarextract + elif ext in ('cb7', '7z'): + from calibre.utils.seven_zip import extract as seven_extract + extractor = seven_extract if extractor is None: raise Exception('Unknown archive type') extractor(path, dir) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 008aa8a106..cb5af24755 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -133,7 +133,7 @@ plugins += [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, KPFExtract] class ComicMetadataReader(MetadataReaderPlugin): name = 'Read comic metadata' - file_types = {'cbr', 'cbz'} + file_types = {'cbr', 'cbz', 'cb7'} description = _('Extract cover from comic files') def customization_help(self, gui=False): @@ -148,8 +148,12 @@ class ComicMetadataReader(MetadataReaderPlugin): ftype = 'cbr' elif id_.startswith(b'PK'): ftype = 'cbz' + elif id_.startswith(b'7z'): + ftype = 'cb7' if ftype == 'cbr': from calibre.utils.unrar import extract_cover_image + elif ftype == 'cb7': + from calibre.utils.seven_zip import extract_cover_image else: from calibre.libunzip import extract_cover_image from calibre.ebooks.metadata import MetaInformation diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 6e5eac642b..ab2260345e 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -35,7 +35,7 @@ class ParserError(ValueError): BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm', 'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'updb', 'pdr', 'prc', 'mobi', 'azw', 'doc', - 'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', + 'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cb7', 'cbz', 'cbc', 'oebzip', 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb', 'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'docm', 'md', 'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx', 'kpf'] diff --git a/src/calibre/ebooks/conversion/plugins/comic_input.py b/src/calibre/ebooks/conversion/plugins/comic_input.py index 4a373ba340..0241f73a9f 100644 --- a/src/calibre/ebooks/conversion/plugins/comic_input.py +++ b/src/calibre/ebooks/conversion/plugins/comic_input.py @@ -21,7 +21,7 @@ class ComicInput(InputFormatPlugin): name = 'Comic Input' author = 'Kovid Goyal' description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices' - file_types = {'cbz', 'cbr', 'cbc'} + file_types = {'cbz', 'cbr', 'cb7', 'cbc'} is_image_collection = True commit_name = 'comic_input' core_usage = -1 diff --git a/src/calibre/test_build.py b/src/calibre/test_build.py index 21713cba03..b37fb1bcfb 100644 --- a/src/calibre/test_build.py +++ b/src/calibre/test_build.py @@ -373,6 +373,10 @@ class BuildTest(unittest.TestCase): from calibre.utils.unrar import test_basic test_basic() + def test_7z(self): + from calibre.utils.seven_zip import test_basic + test_basic() + @unittest.skipUnless(iswindows, 'WPD is windows only') def test_wpd(self): from calibre_extensions import wpd diff --git a/src/calibre/utils/seven_zip.py b/src/calibre/utils/seven_zip.py new file mode 100644 index 0000000000..b5904765d8 --- /dev/null +++ b/src/calibre/utils/seven_zip.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2021, Kovid Goyal + +import os +import re + +from calibre.constants import iswindows + + +def open_archive(path_or_stream, mode='r'): + from py7zr import SevenZipFile + return SevenZipFile(path_or_stream, mode=mode) + + +def names(path_or_stream): + with open_archive(path_or_stream) as zf: + return tuple(zf.getnames()) + + +def extract_member(path_or_stream, match=None, name=None): + if iswindows and name is not None: + name = name.replace(os.sep, '/') + if match is None: + match = re.compile(r'\.(jpg|jpeg|gif|png)\s*$', re.I) + + def is_match(fname): + if iswindows: + fname = fname.replace(os.sep, '/') + return (name is not None and fname == name) or \ + (match is not None and match.search(fname) is not None) + + with open_archive(path_or_stream) as ar: + all_names = list(filter(is_match, ar.getnames())) + if all_names: + return all_names[0] , ar.read(all_names[:1])[all_names[0]].read() + + +def extract_cover_image(stream): + pos = stream.tell() + from calibre.libunzip import name_ok, sort_key + all_names = sorted(names(stream), key=sort_key) + stream.seek(pos) + for name in all_names: + if name_ok(name): + return extract_member(stream, name=name, match=None) + + +def extract(path_or_stream, location): + with open_archive(path_or_stream) as f: + f.extract(location) + + +# Test {{{ + + +def test_basic(): + from tempfile import TemporaryDirectory + from calibre import CurrentDir + + tdata = { + '1/sub-one': b'sub-one\n', + '2/sub-two.txt': b'sub-two\n', + 'F\xfc\xdfe.txt': b'unicode\n', + 'max-compressed': b'max\n', + 'one.txt': b'one\n', + 'symlink': b'2/sub-two.txt', + 'uncompressed': b'uncompressed\n', + '\u8bf6\u6bd4\u5c41.txt': b'chinese unicode\n'} + + def do_test(): + for name, data in tdata.items(): + if '/' in name: + os.makedirs(os.path.dirname(name), exist_ok=True) + with open(name, 'wb') as f: + f.write(data) + with open_archive(os.path.join('a.7z'), mode='w') as zf: + for name in tdata: + zf.write(name) + with open_archive(os.path.join('a.7z')) as zf: + if set(zf.getnames()) != set(tdata): + raise ValueError('names not equal') + read_data = {name:af.read() for name, af in zf.readall().items()} + if read_data != tdata: + raise ValueError('data not equal') + + for name in tdata: + if name not in '1 2 symlink'.split(): + with open(os.path.join(tdir, name), 'rb') as s: + if s.read() != tdata[name]: + raise ValueError('Did not extract %s properly' % name) + + with TemporaryDirectory('test-7z') as tdir, CurrentDir(tdir): + do_test() + + +if __name__ == '__main__': + test_basic()