From a7b5f60f6fd8a6f97f26319575f409ca487cdd67 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 12 Feb 2010 10:27:45 -0700 Subject: [PATCH] Autodetect if a zip/rar file is actually a comic. Fixes #4880 (RAR/ZIP file Autodetection to treat them as CBR/CBZ) --- src/calibre/ebooks/metadata/archive.py | 19 +++++++++++++++---- src/calibre/ebooks/metadata/rar.py | 10 +++++++--- src/calibre/ebooks/metadata/zip.py | 10 ++++++++-- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/calibre/ebooks/metadata/archive.py b/src/calibre/ebooks/metadata/archive.py index 6b71f41a88..f9e78e5dfa 100644 --- a/src/calibre/ebooks/metadata/archive.py +++ b/src/calibre/ebooks/metadata/archive.py @@ -6,17 +6,21 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import textwrap, os +import os from contextlib import closing from calibre.customize import FileTypePlugin +def is_comic(list_of_names): + extensions = set([x.rpartition('.')[-1].lower() for x in list_of_names]) + return len(extensions) == 1 and iter(extensions).next() in ('jpg', 'jpeg', 'png') + class ArchiveExtract(FileTypePlugin): name = 'Archive Extract' author = 'Kovid Goyal' - description = textwrap.dedent(_('''\ - Extract common e-book formats from archives (zip/rar) files. - ''')) + description = _('Extract common e-book formats from archives ' + '(zip/rar) files. Also try to autodetect if they are actually ' + 'cbz/cbr files.') file_types = set(['zip', 'rar']) supported_platforms = ['windows', 'osx', 'linux'] on_import = True @@ -35,6 +39,13 @@ class ArchiveExtract(FileTypePlugin): fnames = zf.namelist() fnames = [x for x in fnames if '.' in x] + if is_comic(fnames): + ext = '.cbr' if is_rar else '.cbz' + of = self.temporary_file('_archive_extract'+ext) + with open(archive, 'rb') as f: + of.write(f.read()) + of.close() + return of.name if len(fnames) > 1 or not fnames: return archive fname = fnames[0] diff --git a/src/calibre/ebooks/metadata/rar.py b/src/calibre/ebooks/metadata/rar.py index 16f2c67af7..896e3d7777 100644 --- a/src/calibre/ebooks/metadata/rar.py +++ b/src/calibre/ebooks/metadata/rar.py @@ -13,6 +13,9 @@ from calibre.ptempfile import PersistentTemporaryFile from calibre.libunrar import extract_member, names def get_metadata(stream): + from calibre.ebooks.metadata.archive import is_comic + from calibre.ebooks.metadata.meta import get_metadata + path = getattr(stream, 'name', False) if not path: pt = PersistentTemporaryFile('_rar-meta.rar') @@ -21,6 +24,8 @@ def get_metadata(stream): path = pt.name path = os.path.abspath(path) file_names = list(names(path)) + if is_comic(file_names): + return get_metadata(stream, 'cbr') for f in file_names: stream_type = os.path.splitext(f)[1].lower() if stream_type: @@ -29,8 +34,7 @@ def get_metadata(stream): 'rb', 'imp', 'pdf', 'lrf'): data = extract_member(path, match=None, name=f)[1] stream = StringIO(data) - from calibre.ebooks.metadata.meta import get_metadata return get_metadata(stream, stream_type) - raise ValueError('No ebook found in RAR archive') - + raise ValueError('No ebook found in RAR archive') + diff --git a/src/calibre/ebooks/metadata/zip.py b/src/calibre/ebooks/metadata/zip.py index 624e0fe73c..db9d751f3a 100644 --- a/src/calibre/ebooks/metadata/zip.py +++ b/src/calibre/ebooks/metadata/zip.py @@ -8,15 +8,21 @@ from cStringIO import StringIO def get_metadata(stream): + from calibre.ebooks.metadata.meta import get_metadata + from calibre.ebooks.metadata.archive import is_comic stream_type = None zf = ZipFile(stream, 'r') - for f in zf.namelist(): + names = zf.namelist() + if is_comic(names): + # Is probably a comic + return get_metadata(stream, 'cbz') + + for f in names: stream_type = os.path.splitext(f)[1].lower() if stream_type: stream_type = stream_type[1:] if stream_type in ('lit', 'opf', 'prc', 'mobi', 'fb2', 'epub', 'rb', 'imp', 'pdf', 'lrf'): - from calibre.ebooks.metadata.meta import get_metadata stream = StringIO(zf.read(f)) return get_metadata(stream, stream_type) raise ValueError('No ebook found in ZIP archive')