When dealing with ZIP/RAR archives, use the file header rather than the file extension to detrmine the file type, when possible. This fixes the common case of CBZ files being actually cbr files and vice versa

This commit is contained in:
Kovid Goyal 2011-04-06 10:03:46 -06:00
parent 593f3aaf0a
commit 504ef95056
2 changed files with 24 additions and 5 deletions

View File

@ -217,14 +217,25 @@ def filename_to_utf8(name):
return name.decode(codec, 'replace').encode('utf8')
def extract(path, dir):
ext = os.path.splitext(path)[1][1:].lower()
extractor = None
if ext in ['zip', 'cbz', 'epub', 'oebzip']:
from calibre.libunzip import extract as zipextract
extractor = zipextract
elif ext in ['cbr', 'rar']:
# First use the file header to identify its type
with open(path, 'rb') as f:
id_ = f.read(3)
if id_ == b'Rar':
from calibre.libunrar import extract as rarextract
extractor = rarextract
elif id_.startswith(b'PK'):
from calibre.libunzip import extract as zipextract
extractor = zipextract
if extractor is None:
# Fallback to file extension
ext = os.path.splitext(path)[1][1:].lower()
if ext in ['zip', 'cbz', 'epub', 'oebzip']:
from calibre.libunzip import extract as zipextract
extractor = zipextract
elif ext in ['cbr', 'rar']:
from calibre.libunrar import extract as rarextract
extractor = rarextract
if extractor is None:
raise Exception('Unknown archive type')
extractor(path, dir)

View File

@ -166,6 +166,14 @@ class ComicMetadataReader(MetadataReaderPlugin):
description = _('Extract cover from comic files')
def get_metadata(self, stream, ftype):
if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
pos = stream.tell()
id_ = stream.read(3)
stream.seek(pos)
if id_ == b'Rar':
ftype = 'cbr'
elif id.startswith(b'PK'):
ftype = 'cbz'
if ftype == 'cbr':
from calibre.libunrar import extract_first_alphabetically as extract_first
extract_first