Use a common words based UA for economist

Seems to work as well as the fixed facebook one and will hopefully
continue working for longer. Also add the accept-language header.
This commit is contained in:
Kovid Goyal 2023-12-10 19:51:14 +05:30
parent 7871fc4021
commit 484605449b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 13 additions and 5 deletions

View File

@ -196,8 +196,10 @@ class Economist(BasicNewsRecipe):
self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold') self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold')
def get_browser(self, *args, **kwargs): def get_browser(self, *args, **kwargs):
kwargs['user_agent'] = 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' # Needed to bypass cloudflare
kwargs['user_agent'] = 'common_words/based'
br = BasicNewsRecipe.get_browser(self, *args, **kwargs) br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')]
return br return br
def preprocess_raw_html(self, raw, url): def preprocess_raw_html(self, raw, url):

View File

@ -63,8 +63,10 @@ class Espresso(BasicNewsRecipe):
nt = new_tag(soup, 'hr') nt = new_tag(soup, 'hr')
hr.append(nt) hr.append(nt)
return soup return soup
def get_browser(self, *args, **kwargs): def get_browser(self, *args, **kwargs):
kwargs['user_agent'] = 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' # Needed to bypass cloudflare
kwargs['user_agent'] = 'common_words/based'
br = BasicNewsRecipe.get_browser(self, *args, **kwargs) br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')]
return br return br

View File

@ -196,8 +196,10 @@ class Economist(BasicNewsRecipe):
self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold') self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold')
def get_browser(self, *args, **kwargs): def get_browser(self, *args, **kwargs):
kwargs['user_agent'] = 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' # Needed to bypass cloudflare
kwargs['user_agent'] = 'common_words/based'
br = BasicNewsRecipe.get_browser(self, *args, **kwargs) br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')]
return br return br
def preprocess_raw_html(self, raw, url): def preprocess_raw_html(self, raw, url):

View File

@ -192,8 +192,10 @@ class Economist(BasicNewsRecipe):
def get_browser(self, *args, **kwargs): def get_browser(self, *args, **kwargs):
kwargs['user_agent'] = 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' # Needed to bypass cloudflare
kwargs['user_agent'] = 'common_words/based'
br = BasicNewsRecipe.get_browser(self, *args, **kwargs) br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')]
return br return br
def preprocess_raw_html(self, raw, url): def preprocess_raw_html(self, raw, url):