mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix bug in imghdr that caused some JPEG files to not be identified
This commit is contained in:
parent
9d90ba326d
commit
79c8ede0a8
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re, tempfile, os, imghdr
|
import re, tempfile, os
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from itertools import izip
|
from itertools import izip
|
||||||
from urllib import quote
|
from urllib import quote
|
||||||
@ -17,6 +17,7 @@ from calibre.customize.conversion import (InputFormatPlugin,
|
|||||||
OptionRecommendation)
|
OptionRecommendation)
|
||||||
from calibre.utils.localization import get_lang
|
from calibre.utils.localization import get_lang
|
||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
|
from calibre.utils.imghdr import what
|
||||||
|
|
||||||
|
|
||||||
class HTMLInput(InputFormatPlugin):
|
class HTMLInput(InputFormatPlugin):
|
||||||
@ -250,7 +251,7 @@ class HTMLInput(InputFormatPlugin):
|
|||||||
if media_type == self.BINARY_MIME:
|
if media_type == self.BINARY_MIME:
|
||||||
# Check for the common case, images
|
# Check for the common case, images
|
||||||
try:
|
try:
|
||||||
img = imghdr.what(link)
|
img = what(link)
|
||||||
except EnvironmentError:
|
except EnvironmentError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
@ -105,7 +105,7 @@ class RTFInput(InputFormatPlugin):
|
|||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
def extract_images(self, picts):
|
def extract_images(self, picts):
|
||||||
import imghdr
|
from calibre.utils.imghdr import what
|
||||||
self.log('Extracting images...')
|
self.log('Extracting images...')
|
||||||
|
|
||||||
with open(picts, 'rb') as f:
|
with open(picts, 'rb') as f:
|
||||||
@ -120,7 +120,7 @@ class RTFInput(InputFormatPlugin):
|
|||||||
if len(enc) % 2 == 1:
|
if len(enc) % 2 == 1:
|
||||||
enc = enc[:-1]
|
enc = enc[:-1]
|
||||||
data = enc.decode('hex')
|
data = enc.decode('hex')
|
||||||
fmt = imghdr.what(None, data)
|
fmt = what(None, data)
|
||||||
if fmt is None:
|
if fmt is None:
|
||||||
fmt = 'wmf'
|
fmt = 'wmf'
|
||||||
count += 1
|
count += 1
|
||||||
|
@ -9,7 +9,7 @@ __copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \
|
|||||||
'Marshall T. Vandegrift <llasram@gmail.com>'
|
'Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, cStringIO, imghdr
|
import os, cStringIO
|
||||||
from struct import pack, unpack
|
from struct import pack, unpack
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
|
||||||
@ -18,12 +18,13 @@ from calibre.ebooks.mobi import MobiError, MAX_THUMB_DIMEN
|
|||||||
from calibre.ebooks.mobi.utils import rescale_image
|
from calibre.ebooks.mobi.utils import rescale_image
|
||||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||||
from calibre.utils.date import now as nowf
|
from calibre.utils.date import now as nowf
|
||||||
|
from calibre.utils.imghdr import what
|
||||||
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||||
|
|
||||||
def is_image(ss):
|
def is_image(ss):
|
||||||
if ss is None:
|
if ss is None:
|
||||||
return False
|
return False
|
||||||
return imghdr.what(None, ss[:200]) is not None
|
return what(None, ss[:200]) is not None
|
||||||
|
|
||||||
class StreamSlicer(object):
|
class StreamSlicer(object):
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import sys, os, imghdr, struct, textwrap
|
import sys, os, struct, textwrap
|
||||||
from itertools import izip
|
from itertools import izip
|
||||||
|
|
||||||
from calibre import CurrentDir
|
from calibre import CurrentDir
|
||||||
@ -18,6 +18,7 @@ from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex,
|
|||||||
from calibre.ebooks.mobi.utils import read_font_record, decode_tbs, RECORD_SIZE
|
from calibre.ebooks.mobi.utils import read_font_record, decode_tbs, RECORD_SIZE
|
||||||
from calibre.ebooks.mobi.debug import format_bytes
|
from calibre.ebooks.mobi.debug import format_bytes
|
||||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||||
|
from calibre.utils.imghdr import what
|
||||||
|
|
||||||
class FDST(object):
|
class FDST(object):
|
||||||
|
|
||||||
@ -173,7 +174,7 @@ class MOBIFile(object):
|
|||||||
font['raw_data'])
|
font['raw_data'])
|
||||||
prefix, ext = 'fonts', font['ext']
|
prefix, ext = 'fonts', font['ext']
|
||||||
elif sig not in known_types:
|
elif sig not in known_types:
|
||||||
q = imghdr.what(None, rec.raw)
|
q = what(None, rec.raw)
|
||||||
if q:
|
if q:
|
||||||
prefix, ext = 'images', q
|
prefix, ext = 'images', q
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import struct, re, os, imghdr
|
import struct, re, os
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from itertools import repeat, izip
|
from itertools import repeat, izip
|
||||||
from urlparse import urldefrag
|
from urlparse import urldefrag
|
||||||
@ -23,6 +23,7 @@ from calibre.ebooks.metadata.toc import TOC
|
|||||||
from calibre.ebooks.mobi.utils import read_font_record
|
from calibre.ebooks.mobi.utils import read_font_record
|
||||||
from calibre.ebooks.oeb.parse_utils import parse_html
|
from calibre.ebooks.oeb.parse_utils import parse_html
|
||||||
from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
|
from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
|
||||||
|
from calibre.utils.imghdr import what
|
||||||
|
|
||||||
Part = namedtuple('Part',
|
Part = namedtuple('Part',
|
||||||
'num type filename start end aid')
|
'num type filename start end aid')
|
||||||
@ -403,7 +404,7 @@ class Mobi8Reader(object):
|
|||||||
if font['encrypted']:
|
if font['encrypted']:
|
||||||
self.encrypted_fonts.append(href)
|
self.encrypted_fonts.append(href)
|
||||||
else:
|
else:
|
||||||
imgtype = imghdr.what(None, data)
|
imgtype = what(None, data)
|
||||||
if imgtype is None:
|
if imgtype is None:
|
||||||
imgtype = 'unknown'
|
imgtype = 'unknown'
|
||||||
href = 'images/%05d.%s'%(fname_idx, imgtype)
|
href = 'images/%05d.%s'%(fname_idx, imgtype)
|
||||||
|
@ -7,11 +7,12 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import struct, string, imghdr, zlib, os
|
import struct, string, zlib, os
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
|
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
|
||||||
|
from calibre.utils.imghdr import what
|
||||||
from calibre.ebooks import normalize
|
from calibre.ebooks import normalize
|
||||||
|
|
||||||
IMAGE_MAX_SIZE = 10 * 1024 * 1024
|
IMAGE_MAX_SIZE = 10 * 1024 * 1024
|
||||||
@ -384,9 +385,9 @@ def to_base(num, base=32, min_num_digits=None):
|
|||||||
|
|
||||||
def mobify_image(data):
|
def mobify_image(data):
|
||||||
'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
|
'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
|
||||||
what = imghdr.what(None, data)
|
fmt = what(None, data)
|
||||||
|
|
||||||
if what == 'png':
|
if fmt == 'png':
|
||||||
im = Image()
|
im = Image()
|
||||||
im.load(data)
|
im.load(data)
|
||||||
data = im.export('gif')
|
data = im.export('gif')
|
||||||
|
@ -7,13 +7,12 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import imghdr
|
|
||||||
|
|
||||||
from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE
|
from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE
|
||||||
from calibre.ebooks.mobi.utils import (rescale_image, mobify_image,
|
from calibre.ebooks.mobi.utils import (rescale_image, mobify_image,
|
||||||
write_font_record)
|
write_font_record)
|
||||||
from calibre.ebooks import generate_masthead
|
from calibre.ebooks import generate_masthead
|
||||||
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
||||||
|
from calibre.utils.imghdr import what
|
||||||
|
|
||||||
PLACEHOLDER_GIF = b'GIF89a\x01\x00\x01\x00\x80\x00\x00\x00\x00\x00\xff\xff\xff!\xf9\x04\x01\x00\x00\x00\x00,\x00\x00\x00\x00\x01\x00\x01\x00@\x02\x01D\x00;'
|
PLACEHOLDER_GIF = b'GIF89a\x01\x00\x01\x00\x80\x00\x00\x00\x00\x00\xff\xff\xff!\xf9\x04\x01\x00\x00\x00\x00,\x00\x00\x00\x00\x01\x00\x01\x00@\x02\x01D\x00;'
|
||||||
|
|
||||||
@ -84,7 +83,7 @@ class Resources(object):
|
|||||||
self.image_indices.add(len(self.records))
|
self.image_indices.add(len(self.records))
|
||||||
self.records.append(data)
|
self.records.append(data)
|
||||||
self.item_map[item.href] = index
|
self.item_map[item.href] = index
|
||||||
self.mime_map[item.href] = 'image/%s'%imghdr.what(None, data)
|
self.mime_map[item.href] = 'image/%s'%what(None, data)
|
||||||
index += 1
|
index += 1
|
||||||
|
|
||||||
if cover_href and item.href == cover_href:
|
if cover_href and item.href == cover_href:
|
||||||
|
156
src/calibre/utils/imghdr.py
Normal file
156
src/calibre/utils/imghdr.py
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
"""Recognize image file formats based on their first few bytes."""
|
||||||
|
|
||||||
|
__all__ = ["what"]
|
||||||
|
|
||||||
|
#-------------------------#
|
||||||
|
# Recognize image headers #
|
||||||
|
#-------------------------#
|
||||||
|
|
||||||
|
def what(file, h=None):
|
||||||
|
if h is None:
|
||||||
|
if isinstance(file, basestring):
|
||||||
|
f = open(file, 'rb')
|
||||||
|
h = f.read(32)
|
||||||
|
else:
|
||||||
|
location = file.tell()
|
||||||
|
h = file.read(32)
|
||||||
|
file.seek(location)
|
||||||
|
f = None
|
||||||
|
else:
|
||||||
|
f = None
|
||||||
|
try:
|
||||||
|
for tf in tests:
|
||||||
|
res = tf(h, f)
|
||||||
|
if res:
|
||||||
|
return res
|
||||||
|
finally:
|
||||||
|
if f: f.close()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
#---------------------------------#
|
||||||
|
# Subroutines per image file type #
|
||||||
|
#---------------------------------#
|
||||||
|
|
||||||
|
tests = []
|
||||||
|
|
||||||
|
def test_jpeg(h, f):
|
||||||
|
"""JPEG data in JFIF format (Changed by Kovid to mimic the file utility,
|
||||||
|
the original code was failing with some jpegs that included ICC_PROFILE
|
||||||
|
data, for example: http://nationalpostnews.files.wordpress.com/2013/03/budget.jpeg?w=300&h=1571)"""
|
||||||
|
if (h[6:10] in (b'JFIF', b'Exif')) or (h[:2] == b'\xff\xd8' and b'JFIF' in h[:32]):
|
||||||
|
return 'jpeg'
|
||||||
|
|
||||||
|
tests.append(test_jpeg)
|
||||||
|
|
||||||
|
def test_png(h, f):
|
||||||
|
if h[:8] == "\211PNG\r\n\032\n":
|
||||||
|
return 'png'
|
||||||
|
|
||||||
|
tests.append(test_png)
|
||||||
|
|
||||||
|
def test_gif(h, f):
|
||||||
|
"""GIF ('87 and '89 variants)"""
|
||||||
|
if h[:6] in ('GIF87a', 'GIF89a'):
|
||||||
|
return 'gif'
|
||||||
|
|
||||||
|
tests.append(test_gif)
|
||||||
|
|
||||||
|
def test_tiff(h, f):
|
||||||
|
"""TIFF (can be in Motorola or Intel byte order)"""
|
||||||
|
if h[:2] in ('MM', 'II'):
|
||||||
|
return 'tiff'
|
||||||
|
|
||||||
|
tests.append(test_tiff)
|
||||||
|
|
||||||
|
def test_rgb(h, f):
|
||||||
|
"""SGI image library"""
|
||||||
|
if h[:2] == '\001\332':
|
||||||
|
return 'rgb'
|
||||||
|
|
||||||
|
tests.append(test_rgb)
|
||||||
|
|
||||||
|
def test_pbm(h, f):
|
||||||
|
"""PBM (portable bitmap)"""
|
||||||
|
if len(h) >= 3 and \
|
||||||
|
h[0] == 'P' and h[1] in '14' and h[2] in ' \t\n\r':
|
||||||
|
return 'pbm'
|
||||||
|
|
||||||
|
tests.append(test_pbm)
|
||||||
|
|
||||||
|
def test_pgm(h, f):
|
||||||
|
"""PGM (portable graymap)"""
|
||||||
|
if len(h) >= 3 and \
|
||||||
|
h[0] == 'P' and h[1] in '25' and h[2] in ' \t\n\r':
|
||||||
|
return 'pgm'
|
||||||
|
|
||||||
|
tests.append(test_pgm)
|
||||||
|
|
||||||
|
def test_ppm(h, f):
|
||||||
|
"""PPM (portable pixmap)"""
|
||||||
|
if len(h) >= 3 and \
|
||||||
|
h[0] == 'P' and h[1] in '36' and h[2] in ' \t\n\r':
|
||||||
|
return 'ppm'
|
||||||
|
|
||||||
|
tests.append(test_ppm)
|
||||||
|
|
||||||
|
def test_rast(h, f):
|
||||||
|
"""Sun raster file"""
|
||||||
|
if h[:4] == '\x59\xA6\x6A\x95':
|
||||||
|
return 'rast'
|
||||||
|
|
||||||
|
tests.append(test_rast)
|
||||||
|
|
||||||
|
def test_xbm(h, f):
|
||||||
|
"""X bitmap (X10 or X11)"""
|
||||||
|
s = '#define '
|
||||||
|
if h[:len(s)] == s:
|
||||||
|
return 'xbm'
|
||||||
|
|
||||||
|
tests.append(test_xbm)
|
||||||
|
|
||||||
|
def test_bmp(h, f):
|
||||||
|
if h[:2] == 'BM':
|
||||||
|
return 'bmp'
|
||||||
|
|
||||||
|
tests.append(test_bmp)
|
||||||
|
|
||||||
|
#--------------------#
|
||||||
|
# Small test program #
|
||||||
|
#--------------------#
|
||||||
|
|
||||||
|
def test():
|
||||||
|
import sys
|
||||||
|
recursive = 0
|
||||||
|
if sys.argv[1:] and sys.argv[1] == '-r':
|
||||||
|
del sys.argv[1:2]
|
||||||
|
recursive = 1
|
||||||
|
try:
|
||||||
|
if sys.argv[1:]:
|
||||||
|
testall(sys.argv[1:], recursive, 1)
|
||||||
|
else:
|
||||||
|
testall(['.'], recursive, 1)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
sys.stderr.write('\n[Interrupted]\n')
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def testall(list, recursive, toplevel):
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
for filename in list:
|
||||||
|
if os.path.isdir(filename):
|
||||||
|
print filename + '/:',
|
||||||
|
if recursive or toplevel:
|
||||||
|
print 'recursing down:'
|
||||||
|
import glob
|
||||||
|
names = glob.glob(os.path.join(filename, '*'))
|
||||||
|
testall(names, recursive, 0)
|
||||||
|
else:
|
||||||
|
print '*** directory (use -r) ***'
|
||||||
|
else:
|
||||||
|
print filename + ':',
|
||||||
|
sys.stdout.flush()
|
||||||
|
try:
|
||||||
|
print what(filename)
|
||||||
|
except IOError:
|
||||||
|
print '*** not found ***'
|
@ -7,7 +7,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
Fetch a webpage and its links recursively. The webpages are saved to disk in
|
Fetch a webpage and its links recursively. The webpages are saved to disk in
|
||||||
UTF-8 encoding with any charset declarations removed.
|
UTF-8 encoding with any charset declarations removed.
|
||||||
'''
|
'''
|
||||||
import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback, imghdr
|
import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback
|
||||||
from urllib import url2pathname, quote
|
from urllib import url2pathname, quote
|
||||||
from httplib import responses
|
from httplib import responses
|
||||||
from base64 import b64decode
|
from base64 import b64decode
|
||||||
@ -21,6 +21,7 @@ from calibre.utils.config import OptionParser
|
|||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
from calibre.utils.magick import Image
|
from calibre.utils.magick import Image
|
||||||
from calibre.utils.magick.draw import identify_data, thumbnail
|
from calibre.utils.magick.draw import identify_data, thumbnail
|
||||||
|
from calibre.utils.imghdr import what
|
||||||
|
|
||||||
class FetchError(Exception):
|
class FetchError(Exception):
|
||||||
pass
|
pass
|
||||||
@ -413,7 +414,7 @@ class RecursiveFetcher(object):
|
|||||||
fname = ascii_filename('img'+str(c))
|
fname = ascii_filename('img'+str(c))
|
||||||
if isinstance(fname, unicode):
|
if isinstance(fname, unicode):
|
||||||
fname = fname.encode('ascii', 'replace')
|
fname = fname.encode('ascii', 'replace')
|
||||||
itype = imghdr.what(None, data)
|
itype = what(None, data)
|
||||||
if itype is None and b'<svg' in data[:1024]:
|
if itype is None and b'<svg' in data[:1024]:
|
||||||
# SVG image
|
# SVG image
|
||||||
imgpath = os.path.join(diskpath, fname+'.svg')
|
imgpath = os.path.join(diskpath, fname+'.svg')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user