From 484605449b0ebfc9e63a8263aadcc6b8e4f20e2c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 10 Dec 2023 19:51:14 +0530 Subject: [PATCH] Use a common words based UA for economist Seems to work as well as the fixed facebook one and will hopefully continue working for longer. Also add the accept-language header. --- recipes/economist.recipe | 4 +++- recipes/economist_espresso.recipe | 6 ++++-- recipes/economist_free.recipe | 4 +++- recipes/economist_world_ahead.recipe | 4 +++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/recipes/economist.recipe b/recipes/economist.recipe index ff3fae9248..8cae56d15c 100644 --- a/recipes/economist.recipe +++ b/recipes/economist.recipe @@ -196,8 +196,10 @@ class Economist(BasicNewsRecipe): self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold') def get_browser(self, *args, **kwargs): - kwargs['user_agent'] = 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' + # Needed to bypass cloudflare + kwargs['user_agent'] = 'common_words/based' br = BasicNewsRecipe.get_browser(self, *args, **kwargs) + br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')] return br def preprocess_raw_html(self, raw, url): diff --git a/recipes/economist_espresso.recipe b/recipes/economist_espresso.recipe index 5a5ac101d2..eace2a4977 100644 --- a/recipes/economist_espresso.recipe +++ b/recipes/economist_espresso.recipe @@ -63,8 +63,10 @@ class Espresso(BasicNewsRecipe): nt = new_tag(soup, 'hr') hr.append(nt) return soup - + def get_browser(self, *args, **kwargs): - kwargs['user_agent'] = 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' + # Needed to bypass cloudflare + kwargs['user_agent'] = 'common_words/based' br = BasicNewsRecipe.get_browser(self, *args, **kwargs) + br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')] return br diff --git a/recipes/economist_free.recipe b/recipes/economist_free.recipe index ff3fae9248..8cae56d15c 100644 --- a/recipes/economist_free.recipe +++ b/recipes/economist_free.recipe @@ -196,8 +196,10 @@ class Economist(BasicNewsRecipe): self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold') def get_browser(self, *args, **kwargs): - kwargs['user_agent'] = 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' + # Needed to bypass cloudflare + kwargs['user_agent'] = 'common_words/based' br = BasicNewsRecipe.get_browser(self, *args, **kwargs) + br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')] return br def preprocess_raw_html(self, raw, url): diff --git a/recipes/economist_world_ahead.recipe b/recipes/economist_world_ahead.recipe index 2554a910fd..f145240996 100644 --- a/recipes/economist_world_ahead.recipe +++ b/recipes/economist_world_ahead.recipe @@ -192,8 +192,10 @@ class Economist(BasicNewsRecipe): def get_browser(self, *args, **kwargs): - kwargs['user_agent'] = 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' + # Needed to bypass cloudflare + kwargs['user_agent'] = 'common_words/based' br = BasicNewsRecipe.get_browser(self, *args, **kwargs) + br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')] return br def preprocess_raw_html(self, raw, url):