mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
News download: Add support for images embedded in the HTML
This commit is contained in:
parent
f187d5f7e8
commit
a7fe71a54c
@ -12,6 +12,7 @@ from urllib import url2pathname, quote
|
|||||||
from httplib import responses
|
from httplib import responses
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
|
from base64 import b64decode
|
||||||
|
|
||||||
from calibre import browser, relpath, unicode_path
|
from calibre import browser, relpath, unicode_path
|
||||||
from calibre.constants import filesystem_encoding, iswindows
|
from calibre.constants import filesystem_encoding, iswindows
|
||||||
@ -346,6 +347,13 @@ class RecursiveFetcher(object):
|
|||||||
c = 0
|
c = 0
|
||||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||||
iurl = tag['src']
|
iurl = tag['src']
|
||||||
|
if iurl.startswith('data:image/'):
|
||||||
|
try:
|
||||||
|
data = b64decode(iurl.partition(',')[-1])
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to decode embedded image')
|
||||||
|
continue
|
||||||
|
else:
|
||||||
if callable(self.image_url_processor):
|
if callable(self.image_url_processor):
|
||||||
iurl = self.image_url_processor(baseurl, iurl)
|
iurl = self.image_url_processor(baseurl, iurl)
|
||||||
if not urlparse.urlsplit(iurl).scheme:
|
if not urlparse.urlsplit(iurl).scheme:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user