From 193d9dfa24c059cd966ee6695fcb4d44e9ac1fdf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 May 2016 10:35:46 +0530 Subject: [PATCH] Modernize imghdr Improve performance by using a memoryview. Also add some tentative code to read image dimensions from the image header --- src/calibre/utils/imghdr.py | 207 +++++++++++++++++++++--------------- 1 file changed, 124 insertions(+), 83 deletions(-) diff --git a/src/calibre/utils/imghdr.py b/src/calibre/utils/imghdr.py index 54ec8209a5..d78db229b8 100644 --- a/src/calibre/utils/imghdr.py +++ b/src/calibre/utils/imghdr.py @@ -1,37 +1,107 @@ -"""Recognize image file formats based on their first few bytes.""" +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2016, Kovid Goyal -__all__ = ["what"] +from __future__ import (unicode_literals, division, absolute_import, + print_function) +from struct import unpack, error -# -------------------------# -# Recognize image headers # -# -------------------------# +""" Recognize image file formats and sizes based on their first few bytes.""" def what(file, h=None): + ' Recognize image headers ' if h is None: if isinstance(file, basestring): - f = open(file, 'rb') - h = f.read(150) + with lopen(file, 'rb') as f: + h = f.read(150) else: location = file.tell() h = file.read(150) file.seek(location) - f = None - else: - f = None - try: - for tf in tests: - res = tf(h, f) - if res: - return res - finally: - if f: - f.close() + if isinstance(h, bytes): + h = memoryview(h) + for tf in tests: + res = tf(h) + if res: + return res # There exist some jpeg files with no headers, only the starting two bits # If we cannot identify as anything else, identify as jpeg. if h[:2] == b'\xff\xd8': return 'jpeg' return None +HSIZE = 200 + +def identify(stream_or_data): + ''' Recognize file format and sizes. Returns format, width, height. width + and height will be -1 if not found and fmt will be None if the image is not + recognized. `stream_or_data` can be a unicode string, in which case it is + assumed to be a filename, or a file-like object, or a bytestring. ''' + width = height = -1 + + if isinstance(stream_or_data, type('')): + with lopen(stream_or_data, 'rb') as sf: + head = sf.read(HSIZE) + elif isinstance(stream_or_data, bytes): + head = stream_or_data + else: + pos = stream_or_data.tell() + head = stream_or_data.read(HSIZE) + stream_or_data.seek(pos) + + if isinstance(head, bytes): + head = memoryview(head) + + fmt = what(None, head) + + if fmt in {'jpeg', 'gif', 'png', 'jpeg2000'}: + size = len(head) + if size >= 10 and head[:6] in (b'GIF87a', b'GIF89a'): + # GIF + try: + width, height = unpack(b"= 16 and head[:8] == b'\211PNG\r\n\032\n': + # PNG + s = head[16:24] if size >= 24 and head[12:16] == b'IHDR' else head[8:16] + try: + width, height = unpack(b">LL", s) + except error: + return fmt, width, height + elif head[:2] == b'\xff\xd8': + # JPEG + try: + width, height = jpeg_dimension(head) + except Exception: + return fmt, width, height + elif size >= 56 and head[:12] == b'\x00\x00\x00\x0cjP \r\n\x87\n': + # JPEG2000 + try: + height, width = unpack(b'>LL', head[48:56]) + except error: + return fmt, width, height + return fmt, width, height + + +def jpeg_dimension(head): + pos = ftype = 0 + size = 2 + while not 0xc0 <= ftype <= 0xcf: + pos += size + byte = head[pos] + pos += 1 + while byte == b'\xff': + byte = head[pos] + pos += 1 + ftype = ord(byte) + size = unpack(b'>H', head[pos:pos+2])[0] - 2 + pos += 2 + # We are at a SOFn block + pos += 1 + height, width = unpack(b'>HH', head[pos:pos+4]) + return width, height + # ---------------------------------# # Subroutines per image file type # @@ -39,141 +109,112 @@ def what(file, h=None): tests = [] -def test_jpeg(h, f): +def test_jpeg(h): """JPEG data in JFIF format (Changed by Kovid to mimic the file utility, the original code was failing with some jpegs that included ICC_PROFILE data, for example: http://nationalpostnews.files.wordpress.com/2013/03/budget.jpeg?w=300&h=1571)""" - if (h[6:10] in (b'JFIF', b'Exif')) or (h[:2] == b'\xff\xd8' and (b'JFIF' in h[:32] or b'8BIM' in h[:32])): + if h[6:10] in (b'JFIF', b'Exif'): return 'jpeg' + if h[:2] == b'\xff\xd8': + q = h[:32].tobytes() + if b'JFIF' in q or b'8BIM' in q: + return 'jpeg' tests.append(test_jpeg) -def test_png(h, f): - if h[:8] == "\211PNG\r\n\032\n": +def test_png(h): + if h[:8] == b"\211PNG\r\n\032\n": return 'png' tests.append(test_png) -def test_gif(h, f): +def test_gif(h): """GIF ('87 and '89 variants)""" - if h[:6] in ('GIF87a', 'GIF89a'): + if h[:6] in (b'GIF87a', b'GIF89a'): return 'gif' tests.append(test_gif) -def test_tiff(h, f): +def test_tiff(h): """TIFF (can be in Motorola or Intel byte order)""" - if h[:2] in ('MM', 'II'): + if h[:2] in (b'MM', b'II'): return 'tiff' tests.append(test_tiff) -def test_webp(h, f): +def test_webp(h): if h[:4] == b'RIFF' and h[8:12] == b'WEBP': return 'webp' tests.append(test_webp) -def test_rgb(h, f): +def test_rgb(h): """SGI image library""" - if h[:2] == '\001\332': + if h[:2] == b'\001\332': return 'rgb' tests.append(test_rgb) -def test_pbm(h, f): +def test_pbm(h): """PBM (portable bitmap)""" if len(h) >= 3 and \ - h[0] == 'P' and h[1] in '14' and h[2] in ' \t\n\r': + h[0] == b'P' and h[1] in b'14' and h[2] in b' \t\n\r': return 'pbm' tests.append(test_pbm) -def test_pgm(h, f): +def test_pgm(h): """PGM (portable graymap)""" if len(h) >= 3 and \ - h[0] == 'P' and h[1] in '25' and h[2] in ' \t\n\r': + h[0] == b'P' and h[1] in b'25' and h[2] in b' \t\n\r': return 'pgm' tests.append(test_pgm) -def test_ppm(h, f): +def test_ppm(h): """PPM (portable pixmap)""" if len(h) >= 3 and \ - h[0] == 'P' and h[1] in '36' and h[2] in ' \t\n\r': + h[0] == b'P' and h[1] in b'36' and h[2] in b' \t\n\r': return 'ppm' tests.append(test_ppm) -def test_rast(h, f): +def test_rast(h): """Sun raster file""" - if h[:4] == '\x59\xA6\x6A\x95': + if h[:4] == b'\x59\xA6\x6A\x95': return 'rast' tests.append(test_rast) -def test_xbm(h, f): +def test_xbm(h): """X bitmap (X10 or X11)""" - s = '#define ' + s = b'#define ' if h[:len(s)] == s: return 'xbm' tests.append(test_xbm) -def test_bmp(h, f): - if h[:2] == 'BM': +def test_bmp(h): + if h[:2] == b'BM': return 'bmp' tests.append(test_bmp) -def test_emf(h, f): +def test_emf(h): if h[:4] == b'\x01\0\0\0' and h[40:44] == b' EMF': return 'emf' tests.append(test_emf) -def test_svg(h, f): - if (h[:2] == b'