mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
News download: Add support for images embedded in the HTML
This commit is contained in:
parent
f187d5f7e8
commit
a7fe71a54c
@ -12,6 +12,7 @@ from urllib import url2pathname, quote
|
||||
from httplib import responses
|
||||
from PIL import Image
|
||||
from cStringIO import StringIO
|
||||
from base64 import b64decode
|
||||
|
||||
from calibre import browser, relpath, unicode_path
|
||||
from calibre.constants import filesystem_encoding, iswindows
|
||||
@ -346,6 +347,13 @@ class RecursiveFetcher(object):
|
||||
c = 0
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
if iurl.startswith('data:image/'):
|
||||
try:
|
||||
data = b64decode(iurl.partition(',')[-1])
|
||||
except:
|
||||
self.log.exception('Failed to decode embedded image')
|
||||
continue
|
||||
else:
|
||||
if callable(self.image_url_processor):
|
||||
iurl = self.image_url_processor(baseurl, iurl)
|
||||
if not urlparse.urlsplit(iurl).scheme:
|
||||
|
Loading…
x
Reference in New Issue
Block a user