News download: Add support for <img> tags that link to SVG images. Fixes #1031553 (web2disk doesn't support SVG images)

This commit is contained in:
Kovid Goyal 2012-08-01 13:28:49 +05:30
parent 0d843a89ea
commit 2848314bf9

View File

@ -7,7 +7,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Fetch a webpage and its links recursively. The webpages are saved to disk in Fetch a webpage and its links recursively. The webpages are saved to disk in
UTF-8 encoding with any charset declarations removed. UTF-8 encoding with any charset declarations removed.
''' '''
import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback import sys, socket, os, urlparse, re, time, copy, urllib2, threading, traceback, imghdr
from urllib import url2pathname, quote from urllib import url2pathname, quote
from httplib import responses from httplib import responses
from PIL import Image from PIL import Image
@ -375,16 +375,25 @@ class RecursiveFetcher(object):
if isinstance(fname, unicode): if isinstance(fname, unicode):
fname = fname.encode('ascii', 'replace') fname = fname.encode('ascii', 'replace')
imgpath = os.path.join(diskpath, fname+'.jpg') imgpath = os.path.join(diskpath, fname+'.jpg')
try: if (imghdr.what(None, data) is None and b'<svg' in data[:1024]):
im = Image.open(StringIO(data)).convert('RGBA') # SVG image
imgpath = os.path.join(diskpath, fname+'.svg')
with self.imagemap_lock: with self.imagemap_lock:
self.imagemap[iurl] = imgpath self.imagemap[iurl] = imgpath
with open(imgpath, 'wb') as x: with open(imgpath, 'wb') as x:
im.save(x, 'JPEG') x.write(data)
tag['src'] = imgpath tag['src'] = imgpath
except: else:
traceback.print_exc() try:
continue im = Image.open(StringIO(data)).convert('RGBA')
with self.imagemap_lock:
self.imagemap[iurl] = imgpath
with open(imgpath, 'wb') as x:
im.save(x, 'JPEG')
tag['src'] = imgpath
except:
traceback.print_exc()
continue
def absurl(self, baseurl, tag, key, filter=True): def absurl(self, baseurl, tag, key, filter=True):
iurl = tag[key] iurl = tag[key]