mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Finish up code to read image dimensions from headers
This commit is contained in:
parent
193d9dfa24
commit
2c339c0b8f
@ -5,18 +5,22 @@
|
|||||||
from __future__ import (unicode_literals, division, absolute_import,
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
print_function)
|
print_function)
|
||||||
from struct import unpack, error
|
from struct import unpack, error
|
||||||
|
import os
|
||||||
|
from calibre.srv.utils import ReadOnlyFileBuffer
|
||||||
|
|
||||||
""" Recognize image file formats and sizes based on their first few bytes."""
|
""" Recognize image file formats and sizes based on their first few bytes."""
|
||||||
|
|
||||||
|
HSIZE = 120
|
||||||
|
|
||||||
def what(file, h=None):
|
def what(file, h=None):
|
||||||
' Recognize image headers '
|
' Recognize image headers '
|
||||||
if h is None:
|
if h is None:
|
||||||
if isinstance(file, basestring):
|
if isinstance(file, basestring):
|
||||||
with lopen(file, 'rb') as f:
|
with lopen(file, 'rb') as f:
|
||||||
h = f.read(150)
|
h = f.read(HSIZE)
|
||||||
else:
|
else:
|
||||||
location = file.tell()
|
location = file.tell()
|
||||||
h = file.read(150)
|
h = file.read(HSIZE)
|
||||||
file.seek(location)
|
file.seek(location)
|
||||||
if isinstance(h, bytes):
|
if isinstance(h, bytes):
|
||||||
h = memoryview(h)
|
h = memoryview(h)
|
||||||
@ -30,52 +34,49 @@ def what(file, h=None):
|
|||||||
return 'jpeg'
|
return 'jpeg'
|
||||||
return None
|
return None
|
||||||
|
|
||||||
HSIZE = 200
|
def identify(src):
|
||||||
|
|
||||||
def identify(stream_or_data):
|
|
||||||
''' Recognize file format and sizes. Returns format, width, height. width
|
''' Recognize file format and sizes. Returns format, width, height. width
|
||||||
and height will be -1 if not found and fmt will be None if the image is not
|
and height will be -1 if not found and fmt will be None if the image is not
|
||||||
recognized. `stream_or_data` can be a unicode string, in which case it is
|
recognized. '''
|
||||||
assumed to be a filename, or a file-like object, or a bytestring. '''
|
|
||||||
width = height = -1
|
width = height = -1
|
||||||
|
|
||||||
if isinstance(stream_or_data, type('')):
|
if isinstance(src, type('')):
|
||||||
with lopen(stream_or_data, 'rb') as sf:
|
stream = lopen(src, 'rb')
|
||||||
head = sf.read(HSIZE)
|
elif isinstance(src, bytes):
|
||||||
elif isinstance(stream_or_data, bytes):
|
stream = ReadOnlyFileBuffer(src)
|
||||||
head = stream_or_data
|
|
||||||
else:
|
else:
|
||||||
pos = stream_or_data.tell()
|
stream = src
|
||||||
head = stream_or_data.read(HSIZE)
|
|
||||||
stream_or_data.seek(pos)
|
|
||||||
|
|
||||||
if isinstance(head, bytes):
|
|
||||||
head = memoryview(head)
|
|
||||||
|
|
||||||
|
pos = stream.tell()
|
||||||
|
head = stream.read(HSIZE)
|
||||||
|
stream.seek(pos)
|
||||||
fmt = what(None, head)
|
fmt = what(None, head)
|
||||||
|
|
||||||
if fmt in {'jpeg', 'gif', 'png', 'jpeg2000'}:
|
if fmt in {'jpeg', 'gif', 'png', 'jpeg2000'}:
|
||||||
size = len(head)
|
size = len(head)
|
||||||
if size >= 10 and head[:6] in (b'GIF87a', b'GIF89a'):
|
if fmt == 'png':
|
||||||
# GIF
|
|
||||||
try:
|
|
||||||
width, height = unpack(b"<hh", head[6:10])
|
|
||||||
except error:
|
|
||||||
return fmt, width, height
|
|
||||||
elif size >= 16 and head[:8] == b'\211PNG\r\n\032\n':
|
|
||||||
# PNG
|
# PNG
|
||||||
s = head[16:24] if size >= 24 and head[12:16] == b'IHDR' else head[8:16]
|
s = head[16:24] if size >= 24 and head[12:16] == b'IHDR' else head[8:16]
|
||||||
try:
|
try:
|
||||||
width, height = unpack(b">LL", s)
|
width, height = unpack(b">LL", s)
|
||||||
except error:
|
except error:
|
||||||
return fmt, width, height
|
return fmt, width, height
|
||||||
elif head[:2] == b'\xff\xd8':
|
elif fmt == 'jpeg':
|
||||||
# JPEG
|
# JPEG
|
||||||
|
pos = stream.tell()
|
||||||
try:
|
try:
|
||||||
width, height = jpeg_dimension(head)
|
height, width = jpeg_dimensions(stream)
|
||||||
except Exception:
|
except Exception:
|
||||||
return fmt, width, height
|
return fmt, width, height
|
||||||
elif size >= 56 and head[:12] == b'\x00\x00\x00\x0cjP \r\n\x87\n':
|
finally:
|
||||||
|
stream.seek(pos)
|
||||||
|
elif fmt == 'gif':
|
||||||
|
# GIF
|
||||||
|
try:
|
||||||
|
width, height = unpack(b"<HH", head[6:10])
|
||||||
|
except error:
|
||||||
|
return fmt, width, height
|
||||||
|
elif size >= 56 and fmt == 'jpeg2000':
|
||||||
# JPEG2000
|
# JPEG2000
|
||||||
try:
|
try:
|
||||||
height, width = unpack(b'>LL', head[48:56])
|
height, width = unpack(b'>LL', head[48:56])
|
||||||
@ -83,26 +84,6 @@ def identify(stream_or_data):
|
|||||||
return fmt, width, height
|
return fmt, width, height
|
||||||
return fmt, width, height
|
return fmt, width, height
|
||||||
|
|
||||||
|
|
||||||
def jpeg_dimension(head):
|
|
||||||
pos = ftype = 0
|
|
||||||
size = 2
|
|
||||||
while not 0xc0 <= ftype <= 0xcf:
|
|
||||||
pos += size
|
|
||||||
byte = head[pos]
|
|
||||||
pos += 1
|
|
||||||
while byte == b'\xff':
|
|
||||||
byte = head[pos]
|
|
||||||
pos += 1
|
|
||||||
ftype = ord(byte)
|
|
||||||
size = unpack(b'>H', head[pos:pos+2])[0] - 2
|
|
||||||
pos += 2
|
|
||||||
# We are at a SOFn block
|
|
||||||
pos += 1
|
|
||||||
height, width = unpack(b'>HH', head[pos:pos+4])
|
|
||||||
return width, height
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------#
|
# ---------------------------------#
|
||||||
# Subroutines per image file type #
|
# Subroutines per image file type #
|
||||||
# ---------------------------------#
|
# ---------------------------------#
|
||||||
@ -120,6 +101,47 @@ def test_jpeg(h):
|
|||||||
if b'JFIF' in q or b'8BIM' in q:
|
if b'JFIF' in q or b'8BIM' in q:
|
||||||
return 'jpeg'
|
return 'jpeg'
|
||||||
|
|
||||||
|
def jpeg_dimensions(stream):
|
||||||
|
# A JPEG marker is two bytes of the form 0xff x where 0 < x < 0xff
|
||||||
|
# See section B.1.1.2 of https://www.w3.org/Graphics/JPEG/itu-t81.pdf
|
||||||
|
# We read the dimensions from the first SOFn section we come across
|
||||||
|
stream.seek(2, os.SEEK_CUR)
|
||||||
|
|
||||||
|
def read(n):
|
||||||
|
ans = stream.read(n)
|
||||||
|
if len(ans) != n:
|
||||||
|
raise ValueError('Truncated JPEG data')
|
||||||
|
return ans
|
||||||
|
|
||||||
|
x = b''
|
||||||
|
while True:
|
||||||
|
# Find next marker
|
||||||
|
while x != b'\xff':
|
||||||
|
x = read(1)
|
||||||
|
# Soak up padding
|
||||||
|
marker = b'\xff'
|
||||||
|
while marker == b'\xff':
|
||||||
|
marker = read(1)
|
||||||
|
q = ord(marker[0]) # [0] needed for memoryview
|
||||||
|
if 0xc0 <= q <= 0xcf and q != 0xc4 and q != 0xcc:
|
||||||
|
# SOFn marker
|
||||||
|
stream.seek(3, os.SEEK_CUR)
|
||||||
|
return unpack(b'>HH', read(4))
|
||||||
|
elif 0xd8 <= q <= 0xda:
|
||||||
|
break # start of image, end of image, start of scan, no point
|
||||||
|
elif q == 0:
|
||||||
|
return -1, -1 # Corrupted JPEG
|
||||||
|
elif q == 0x01 or 0xd0 <= q <= 0xd7:
|
||||||
|
# Standalone marker
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# skip this section
|
||||||
|
size = unpack(b'>H', read(2))[0]
|
||||||
|
stream.seek(size - 2, os.SEEK_CUR)
|
||||||
|
# standalone marker, keep going
|
||||||
|
|
||||||
|
return -1, -1
|
||||||
|
|
||||||
tests.append(test_jpeg)
|
tests.append(test_jpeg)
|
||||||
|
|
||||||
def test_png(h):
|
def test_png(h):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user