From 504ef950568ab8fcdd0b04c7af5de78ffd4ab0a1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 6 Apr 2011 10:03:46 -0600 Subject: [PATCH] When dealing with ZIP/RAR archives, use the file header rather than the file extension to detrmine the file type, when possible. This fixes the common case of CBZ files being actually cbr files and vice versa --- src/calibre/__init__.py | 21 ++++++++++++++++----- src/calibre/customize/builtins.py | 8 ++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py index 1799072045..2f457bf2bc 100644 --- a/src/calibre/__init__.py +++ b/src/calibre/__init__.py @@ -217,14 +217,25 @@ def filename_to_utf8(name): return name.decode(codec, 'replace').encode('utf8') def extract(path, dir): - ext = os.path.splitext(path)[1][1:].lower() extractor = None - if ext in ['zip', 'cbz', 'epub', 'oebzip']: - from calibre.libunzip import extract as zipextract - extractor = zipextract - elif ext in ['cbr', 'rar']: + # First use the file header to identify its type + with open(path, 'rb') as f: + id_ = f.read(3) + if id_ == b'Rar': from calibre.libunrar import extract as rarextract extractor = rarextract + elif id_.startswith(b'PK'): + from calibre.libunzip import extract as zipextract + extractor = zipextract + if extractor is None: + # Fallback to file extension + ext = os.path.splitext(path)[1][1:].lower() + if ext in ['zip', 'cbz', 'epub', 'oebzip']: + from calibre.libunzip import extract as zipextract + extractor = zipextract + elif ext in ['cbr', 'rar']: + from calibre.libunrar import extract as rarextract + extractor = rarextract if extractor is None: raise Exception('Unknown archive type') extractor(path, dir) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 1e40a8e5ff..91abfacc95 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -166,6 +166,14 @@ class ComicMetadataReader(MetadataReaderPlugin): description = _('Extract cover from comic files') def get_metadata(self, stream, ftype): + if hasattr(stream, 'seek') and hasattr(stream, 'tell'): + pos = stream.tell() + id_ = stream.read(3) + stream.seek(pos) + if id_ == b'Rar': + ftype = 'cbr' + elif id.startswith(b'PK'): + ftype = 'cbz' if ftype == 'cbr': from calibre.libunrar import extract_first_alphabetically as extract_first extract_first