mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix images not working for Guardian and Independent
Apparently they serve images in JPEG-XR format if the user agent is IE
This commit is contained in:
parent
bcbac05d04
commit
d700523080
@ -52,6 +52,12 @@ class Guardian(BasicNewsRecipe):
|
||||
dict(attrs={'class': lambda x: x and 'content__article-body' in x.split()}),
|
||||
]
|
||||
|
||||
def get_browser(self, *a, **kw):
|
||||
# This site returns images in JPEG-XR format if the user agent is IE
|
||||
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
||||
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.111 Safari/537.36')]
|
||||
return br
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
import html5lib
|
||||
from lxml import html
|
||||
|
@ -36,6 +36,12 @@ class TheIndependentNew(BasicNewsRecipe):
|
||||
|
||||
remove_attributes = ['style']
|
||||
|
||||
def get_browser(self, *a, **kw):
|
||||
# This site returns images in JPEG-XR format if the user agent is IE
|
||||
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
||||
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.111 Safari/537.36')]
|
||||
return br
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for div in soup.findAll(attrs={'class': 'full-gallery'}):
|
||||
imgs = {}
|
||||
|
Loading…
x
Reference in New Issue
Block a user