diff --git a/recipes/guardian.recipe b/recipes/guardian.recipe index c6f814952f..d0619b052d 100644 --- a/recipes/guardian.recipe +++ b/recipes/guardian.recipe @@ -52,6 +52,12 @@ class Guardian(BasicNewsRecipe): dict(attrs={'class': lambda x: x and 'content__article-body' in x.split()}), ] + def get_browser(self, *a, **kw): + # This site returns images in JPEG-XR format if the user agent is IE + br = BasicNewsRecipe.get_browser(self, *a, **kw) + br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.111 Safari/537.36')] + return br + def preprocess_raw_html(self, raw, url): import html5lib from lxml import html diff --git a/recipes/independent.recipe b/recipes/independent.recipe index 6194f05891..da97af0453 100644 --- a/recipes/independent.recipe +++ b/recipes/independent.recipe @@ -36,6 +36,12 @@ class TheIndependentNew(BasicNewsRecipe): remove_attributes = ['style'] + def get_browser(self, *a, **kw): + # This site returns images in JPEG-XR format if the user agent is IE + br = BasicNewsRecipe.get_browser(self, *a, **kw) + br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.111 Safari/537.36')] + return br + def preprocess_html(self, soup): for div in soup.findAll(attrs={'class': 'full-gallery'}): imgs = {}