mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix images not working for Guardian and Independent
Apparently they serve images in JPEG-XR format if the user agent is IE
This commit is contained in:
parent
bcbac05d04
commit
d700523080
@ -52,6 +52,12 @@ class Guardian(BasicNewsRecipe):
|
|||||||
dict(attrs={'class': lambda x: x and 'content__article-body' in x.split()}),
|
dict(attrs={'class': lambda x: x and 'content__article-body' in x.split()}),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_browser(self, *a, **kw):
|
||||||
|
# This site returns images in JPEG-XR format if the user agent is IE
|
||||||
|
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
||||||
|
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.111 Safari/537.36')]
|
||||||
|
return br
|
||||||
|
|
||||||
def preprocess_raw_html(self, raw, url):
|
def preprocess_raw_html(self, raw, url):
|
||||||
import html5lib
|
import html5lib
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
@ -36,6 +36,12 @@ class TheIndependentNew(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_attributes = ['style']
|
remove_attributes = ['style']
|
||||||
|
|
||||||
|
def get_browser(self, *a, **kw):
|
||||||
|
# This site returns images in JPEG-XR format if the user agent is IE
|
||||||
|
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
||||||
|
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.111 Safari/537.36')]
|
||||||
|
return br
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for div in soup.findAll(attrs={'class': 'full-gallery'}):
|
for div in soup.findAll(attrs={'class': 'full-gallery'}):
|
||||||
imgs = {}
|
imgs = {}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user