mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix bug in imghdr that caused some JPEG files to not be identified
This commit is contained in:
parent
9d90ba326d
commit
79c8ede0a8
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, tempfile, os, imghdr
|
||||
import re, tempfile, os
|
||||
from functools import partial
|
||||
from itertools import izip
|
||||
from urllib import quote
|
||||
@ -17,6 +17,7 @@ from calibre.customize.conversion import (InputFormatPlugin,
|
||||
OptionRecommendation)
|
||||
from calibre.utils.localization import get_lang
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.utils.imghdr import what
|
||||
|
||||
|
||||
class HTMLInput(InputFormatPlugin):
|
||||
@ -250,7 +251,7 @@ class HTMLInput(InputFormatPlugin):
|
||||
if media_type == self.BINARY_MIME:
|
||||
# Check for the common case, images
|
||||
try:
|
||||
img = imghdr.what(link)
|
||||
img = what(link)
|
||||
except EnvironmentError:
|
||||
pass
|
||||
else:
|
||||
|
@ -105,7 +105,7 @@ class RTFInput(InputFormatPlugin):
|
||||
return f.read()
|
||||
|
||||
def extract_images(self, picts):
|
||||
import imghdr
|
||||
from calibre.utils.imghdr import what
|
||||
self.log('Extracting images...')
|
||||
|
||||
with open(picts, 'rb') as f:
|
||||
@ -120,7 +120,7 @@ class RTFInput(InputFormatPlugin):
|
||||
if len(enc) % 2 == 1:
|
||||
enc = enc[:-1]
|
||||
data = enc.decode('hex')
|
||||
fmt = imghdr.what(None, data)
|
||||
fmt = what(None, data)
|
||||
if fmt is None:
|
||||
fmt = 'wmf'
|
||||
count += 1
|
||||
|
@ -9,7 +9,7 @@ __copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \
|
||||
'Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, cStringIO, imghdr
|
||||
import os, cStringIO
|
||||
from struct import pack, unpack
|
||||
from cStringIO import StringIO
|
||||
|
||||
@ -18,12 +18,13 @@ from calibre.ebooks.mobi import MobiError, MAX_THUMB_DIMEN
|
||||
from calibre.ebooks.mobi.utils import rescale_image
|
||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||
from calibre.utils.date import now as nowf
|
||||
from calibre.utils.imghdr import what
|
||||
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||
|
||||
def is_image(ss):
|
||||
if ss is None:
|
||||
return False
|
||||
return imghdr.what(None, ss[:200]) is not None
|
||||
return what(None, ss[:200]) is not None
|
||||
|
||||
class StreamSlicer(object):
|
||||
|
||||
|
@ -8,7 +8,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys, os, imghdr, struct, textwrap
|
||||
import sys, os, struct, textwrap
|
||||
from itertools import izip
|
||||
|
||||
from calibre import CurrentDir
|
||||
@ -18,6 +18,7 @@ from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex,
|
||||
from calibre.ebooks.mobi.utils import read_font_record, decode_tbs, RECORD_SIZE
|
||||
from calibre.ebooks.mobi.debug import format_bytes
|
||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||
from calibre.utils.imghdr import what
|
||||
|
||||
class FDST(object):
|
||||
|
||||
@ -173,7 +174,7 @@ class MOBIFile(object):
|
||||
font['raw_data'])
|
||||
prefix, ext = 'fonts', font['ext']
|
||||
elif sig not in known_types:
|
||||
q = imghdr.what(None, rec.raw)
|
||||
q = what(None, rec.raw)
|
||||
if q:
|
||||
prefix, ext = 'images', q
|
||||
|
||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct, re, os, imghdr
|
||||
import struct, re, os
|
||||
from collections import namedtuple
|
||||
from itertools import repeat, izip
|
||||
from urlparse import urldefrag
|
||||
@ -23,6 +23,7 @@ from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.mobi.utils import read_font_record
|
||||
from calibre.ebooks.oeb.parse_utils import parse_html
|
||||
from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
|
||||
from calibre.utils.imghdr import what
|
||||
|
||||
Part = namedtuple('Part',
|
||||
'num type filename start end aid')
|
||||
@ -403,7 +404,7 @@ class Mobi8Reader(object):
|
||||
if font['encrypted']:
|
||||
self.encrypted_fonts.append(href)
|
||||
else:
|
||||
imgtype = imghdr.what(None, data)
|
||||
imgtype = what(None, data)
|
||||
if imgtype is None:
|
||||
imgtype = 'unknown'
|
||||
href = 'images/%05d.%s'%(fname_idx, imgtype)
|
||||
|
@ -7,11 +7,12 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct, string, imghdr, zlib, os
|
||||
import struct, string, zlib, os
|
||||
from collections import OrderedDict
|
||||
from io import BytesIO
|
||||
|
||||
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
|
||||
from calibre.utils.imghdr import what
|
||||
from calibre.ebooks import normalize
|
||||
|
||||
IMAGE_MAX_SIZE = 10 * 1024 * 1024
|
||||
@ -384,9 +385,9 @@ def to_base(num, base=32, min_num_digits=None):
|
||||
|
||||
def mobify_image(data):
|
||||
'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
|
||||
what = imghdr.what(None, data)
|
||||
fmt = what(None, data)
|
||||
|
||||
if what == 'png':
|
||||
if fmt == 'png':
|
||||
im = Image()
|
||||
im.load(data)
|
||||
data = im.export('gif')
|
||||
|
@ -7,13 +7,12 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import imghdr
|
||||
|
||||
from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE
|
||||
from calibre.ebooks.mobi.utils import (rescale_image, mobify_image,
|
||||
write_font_record)
|
||||
from calibre.ebooks import generate_masthead
|
||||
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
||||
from calibre.utils.imghdr import what
|
||||
|
||||
PLACEHOLDER_GIF = b'GIF89a\x01\x00\x01\x00\x80\x00\x00\x00\x00\x00\xff\xff\xff!\xf9\x04\x01\x00\x00\x00\x00,\x00\x00\x00\x00\x01\x00\x01\x00@\x02\x01D\x00;'
|
||||
|
||||
@ -84,7 +83,7 @@ class Resources(object):
|
||||
self.image_indices.add(len(self.records))
|
||||
self.records.append(data)
|
||||
self.item_map[item.href] = index
|
||||
self.mime_map[item.href] = 'image/%s'%imghdr.what(None, data)
|
||||
self.mime_map[item.href] = 'image/%s'%what(None, data)
|
||||
index += 1
|
||||
|
||||
if cover_href and item.href == cover_href:
|
||||
|
156
src/calibre/utils/imghdr.py
Normal file
156
src/calibre/utils/imghdr.py
Normal file
@ -0,0 +1,156 @@
|
||||
"""Recognize image file formats based on their first few bytes."""
|
||||
|
||||
__all__ = ["what"]
|
||||
|
||||
#-------------------------#
|
||||
# Recognize image headers #
|
||||
#-------------------------#
|
||||
|
||||
def what(file, h=None):
|
||||
if h is None:
|
||||
if isinstance(file, basestring):
|
||||
f = open(file, 'rb')
|
||||
h = f.read(32)
|
||||
else:
|
||||
location = file.tell()
|
||||
h = file.read(32)
|
||||
file.seek(location)
|
||||
f = None
|
||||
else:
|
||||
f = None
|
||||
try:
|
||||
for tf in tests:
|
||||
res = tf(h, f)
|
||||
if res:
|
||||
return res
|
||||
finally:
|
||||
if f: f.close()
|
||||
return None
|
||||
|
||||
|
||||
#---------------------------------#
|
||||
# Subroutines per image file type #
|
||||
#---------------------------------#
|
||||
|
||||
tests = []
|
||||
|
||||
def test_jpeg(h, f):
|
||||
"""JPEG data in JFIF format (Changed by Kovid to mimic the file utility,
|
||||
the original code was failing with some jpegs that included ICC_PROFILE
|
||||
data, for example: http://nationalpostnews.files.wordpress.com/2013/03/budget.jpeg?w=300&h=1571)"""
|
||||
if (h[6:10] in (b'JFIF', b'Exif')) or (h[:2] == b'\xff\xd8' and b'JFIF' in h[:32]):
|
||||
return 'jpeg'
|
||||
|
||||
tests.append(test_jpeg)
|
||||
|
||||
def test_png(h, f):
|
||||
if h[:8] == "\211PNG\r\n\032\n":
|
||||
return 'png'
|
||||
|
||||
tests.append(test_png)
|
||||
|
||||
def test_gif(h, f):
|
||||
"""GIF ('87 and '89 variants)"""
|
||||
if h[:6] in ('GIF87a', 'GIF89a'):
|
||||
return 'gif'
|
||||
|
||||
tests.append(test_gif)
|
||||
|
||||
def test_tiff(h, f):
|
||||
"""TIFF (can be in Motorola or Intel byte order)"""
|
||||
if h[:2] in ('MM', 'II'):
|
||||
return 'tiff'
|
||||
|
||||
tests.append(test_tiff)
|
||||
|
||||
def test_rgb(h, f):
|
||||
"""SGI image library"""
|
||||
if h[:2] == '\001\332':
|
||||
return 'rgb'
|
||||
|
||||
tests.append(test_rgb)
|
||||
|
||||
def test_pbm(h, f):
|
||||
"""PBM (portable bitmap)"""
|
||||
if len(h) >= 3 and \
|
||||
h[0] == 'P' and h[1] in '14' and h[2] in ' \t\n\r':
|
||||
return 'pbm'
|
||||
|
||||
tests.append(test_pbm)
|
||||
|
||||
def test_pgm(h, f):
|
||||
"""PGM (portable graymap)"""
|
||||
if len(h) >= 3 and \
|
||||
h[0] == 'P' and h[1] in '25' and h[2] in ' \t\n\r':
|
||||
return 'pgm'
|
||||
|
||||
tests.append(test_pgm)
|
||||
|
||||
def test_ppm(h, f):
|
||||
"""PPM (portable pixmap)"""
|
||||
if len(h) >= 3 and \
|
||||
h[0] == 'P' and h[1] in '36' and h[2] in ' \t\n\r':
|
||||
return 'ppm'
|
||||
|
||||
tests.append(test_ppm)
|
||||
|
||||
def test_rast(h, f):
|
||||
"""Sun raster file"""
|
||||
if h[:4] == '\x59\xA6\x6A\x95':
|
||||
return 'rast'
|
||||
|
||||
tests.append(test_rast)
|
||||
|
||||
def test_xbm(h, f):
|
||||
"""X bitmap (X10 or X11)"""
|
||||
s = '#define '
|
||||
if h[:len(s)] == s:
|
||||
return 'xbm'
|
||||
|
||||
tests.append(test_xbm)
|
||||
|
||||
def test_bmp(h, f):
|
||||
if h[:2] == 'BM':
|
||||
return 'bmp'
|
||||
|
||||
tests.append(test_bmp)
|
||||
|
||||
#--------------------#
|
||||
# Small test program #
|
||||
#--------------------#
|
||||
|
||||
def test():
|
||||
import sys
|
||||
recursive = 0
|
||||
if sys.argv[1:] and sys.argv[1] == '-r':
|
||||
del sys.argv[1:2]
|
||||
recursive = 1
|
||||
try:
|
||||
if sys.argv[1:]:
|
||||
testall(sys.argv[1:], recursive, 1)
|
||||
else:
|
||||
testall(['.'], recursive, 1)
|
||||
except KeyboardInterrupt:
|
||||
sys.stderr.write('\n[Interrupted]\n')
|
||||
sys.exit(1)
|
||||
|
||||
def testall(list, recursive, toplevel):
|
||||
import sys
|
||||
import os
|
||||
for filename in list:
|
||||
if os.path.isdir(filename):
|
||||
print filename + '/:',
|
||||
if recursive or toplevel:
|
||||
print 'recursing down:'
|
||||
import glob
|
||||
names = glob.glob(os.path.join(filename, '*'))
|
||||
testall(names, recursive, 0)
|
||||
else:
|
||||
print '*** directory (use -r) ***'
|
||||
else:
|
||||
print filename + ':',
|
||||
sys.stdout.flush()
|
||||
try:
|
||||
print what(filename)
|
||||
except IOError:
|
||||
print '*** not found ***'
|
@ -7,7 +7,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
Fetch a webpage and its links recursively. The webpages are saved to disk in
|
||||
UTF-8 encoding with any charset declarations removed.
|
||||
'''
|
||||
import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback, imghdr
|
||||
import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback
|
||||
from urllib import url2pathname, quote
|
||||
from httplib import responses
|
||||
from base64 import b64decode
|
||||
@ -21,6 +21,7 @@ from calibre.utils.config import OptionParser
|
||||
from calibre.utils.logging import Log
|
||||
from calibre.utils.magick import Image
|
||||
from calibre.utils.magick.draw import identify_data, thumbnail
|
||||
from calibre.utils.imghdr import what
|
||||
|
||||
class FetchError(Exception):
|
||||
pass
|
||||
@ -413,7 +414,7 @@ class RecursiveFetcher(object):
|
||||
fname = ascii_filename('img'+str(c))
|
||||
if isinstance(fname, unicode):
|
||||
fname = fname.encode('ascii', 'replace')
|
||||
itype = imghdr.what(None, data)
|
||||
itype = what(None, data)
|
||||
if itype is None and b'<svg' in data[:1024]:
|
||||
# SVG image
|
||||
imgpath = os.path.join(diskpath, fname+'.svg')
|
||||
|
Loading…
x
Reference in New Issue
Block a user