Finish up code to read image dimensions from headers

This commit is contained in:
Kovid Goyal 2016-05-04 13:37:22 +05:30
parent 193d9dfa24
commit 2c339c0b8f

View File

@ -5,18 +5,22 @@
from __future__ import (unicode_literals, division, absolute_import, from __future__ import (unicode_literals, division, absolute_import,
print_function) print_function)
from struct import unpack, error from struct import unpack, error
import os
from calibre.srv.utils import ReadOnlyFileBuffer
""" Recognize image file formats and sizes based on their first few bytes.""" """ Recognize image file formats and sizes based on their first few bytes."""
HSIZE = 120
def what(file, h=None): def what(file, h=None):
' Recognize image headers ' ' Recognize image headers '
if h is None: if h is None:
if isinstance(file, basestring): if isinstance(file, basestring):
with lopen(file, 'rb') as f: with lopen(file, 'rb') as f:
h = f.read(150) h = f.read(HSIZE)
else: else:
location = file.tell() location = file.tell()
h = file.read(150) h = file.read(HSIZE)
file.seek(location) file.seek(location)
if isinstance(h, bytes): if isinstance(h, bytes):
h = memoryview(h) h = memoryview(h)
@ -30,52 +34,49 @@ def what(file, h=None):
return 'jpeg' return 'jpeg'
return None return None
HSIZE = 200 def identify(src):
def identify(stream_or_data):
''' Recognize file format and sizes. Returns format, width, height. width ''' Recognize file format and sizes. Returns format, width, height. width
and height will be -1 if not found and fmt will be None if the image is not and height will be -1 if not found and fmt will be None if the image is not
recognized. `stream_or_data` can be a unicode string, in which case it is recognized. '''
assumed to be a filename, or a file-like object, or a bytestring. '''
width = height = -1 width = height = -1
if isinstance(stream_or_data, type('')): if isinstance(src, type('')):
with lopen(stream_or_data, 'rb') as sf: stream = lopen(src, 'rb')
head = sf.read(HSIZE) elif isinstance(src, bytes):
elif isinstance(stream_or_data, bytes): stream = ReadOnlyFileBuffer(src)
head = stream_or_data
else: else:
pos = stream_or_data.tell() stream = src
head = stream_or_data.read(HSIZE)
stream_or_data.seek(pos)
if isinstance(head, bytes):
head = memoryview(head)
pos = stream.tell()
head = stream.read(HSIZE)
stream.seek(pos)
fmt = what(None, head) fmt = what(None, head)
if fmt in {'jpeg', 'gif', 'png', 'jpeg2000'}: if fmt in {'jpeg', 'gif', 'png', 'jpeg2000'}:
size = len(head) size = len(head)
if size >= 10 and head[:6] in (b'GIF87a', b'GIF89a'): if fmt == 'png':
# GIF
try:
width, height = unpack(b"<hh", head[6:10])
except error:
return fmt, width, height
elif size >= 16 and head[:8] == b'\211PNG\r\n\032\n':
# PNG # PNG
s = head[16:24] if size >= 24 and head[12:16] == b'IHDR' else head[8:16] s = head[16:24] if size >= 24 and head[12:16] == b'IHDR' else head[8:16]
try: try:
width, height = unpack(b">LL", s) width, height = unpack(b">LL", s)
except error: except error:
return fmt, width, height return fmt, width, height
elif head[:2] == b'\xff\xd8': elif fmt == 'jpeg':
# JPEG # JPEG
pos = stream.tell()
try: try:
width, height = jpeg_dimension(head) height, width = jpeg_dimensions(stream)
except Exception: except Exception:
return fmt, width, height return fmt, width, height
elif size >= 56 and head[:12] == b'\x00\x00\x00\x0cjP \r\n\x87\n': finally:
stream.seek(pos)
elif fmt == 'gif':
# GIF
try:
width, height = unpack(b"<HH", head[6:10])
except error:
return fmt, width, height
elif size >= 56 and fmt == 'jpeg2000':
# JPEG2000 # JPEG2000
try: try:
height, width = unpack(b'>LL', head[48:56]) height, width = unpack(b'>LL', head[48:56])
@ -83,26 +84,6 @@ def identify(stream_or_data):
return fmt, width, height return fmt, width, height
return fmt, width, height return fmt, width, height
def jpeg_dimension(head):
pos = ftype = 0
size = 2
while not 0xc0 <= ftype <= 0xcf:
pos += size
byte = head[pos]
pos += 1
while byte == b'\xff':
byte = head[pos]
pos += 1
ftype = ord(byte)
size = unpack(b'>H', head[pos:pos+2])[0] - 2
pos += 2
# We are at a SOFn block
pos += 1
height, width = unpack(b'>HH', head[pos:pos+4])
return width, height
# ---------------------------------# # ---------------------------------#
# Subroutines per image file type # # Subroutines per image file type #
# ---------------------------------# # ---------------------------------#
@ -120,6 +101,47 @@ def test_jpeg(h):
if b'JFIF' in q or b'8BIM' in q: if b'JFIF' in q or b'8BIM' in q:
return 'jpeg' return 'jpeg'
def jpeg_dimensions(stream):
# A JPEG marker is two bytes of the form 0xff x where 0 < x < 0xff
# See section B.1.1.2 of https://www.w3.org/Graphics/JPEG/itu-t81.pdf
# We read the dimensions from the first SOFn section we come across
stream.seek(2, os.SEEK_CUR)
def read(n):
ans = stream.read(n)
if len(ans) != n:
raise ValueError('Truncated JPEG data')
return ans
x = b''
while True:
# Find next marker
while x != b'\xff':
x = read(1)
# Soak up padding
marker = b'\xff'
while marker == b'\xff':
marker = read(1)
q = ord(marker[0]) # [0] needed for memoryview
if 0xc0 <= q <= 0xcf and q != 0xc4 and q != 0xcc:
# SOFn marker
stream.seek(3, os.SEEK_CUR)
return unpack(b'>HH', read(4))
elif 0xd8 <= q <= 0xda:
break # start of image, end of image, start of scan, no point
elif q == 0:
return -1, -1 # Corrupted JPEG
elif q == 0x01 or 0xd0 <= q <= 0xd7:
# Standalone marker
continue
else:
# skip this section
size = unpack(b'>H', read(2))[0]
stream.seek(size - 2, os.SEEK_CUR)
# standalone marker, keep going
return -1, -1
tests.append(test_jpeg) tests.append(test_jpeg)
def test_png(h): def test_png(h):