From 5ca71f1fe88c9dd85cf5d8a3a450a8e0df4d8c2a Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sun, 15 Oct 2023 15:30:32 +0530 Subject: [PATCH] Update financial_times.recipe --- recipes/financial_times.recipe | 33 +++++++-------------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/recipes/financial_times.recipe b/recipes/financial_times.recipe index 45f2f1701c..61f05d7284 100644 --- a/recipes/financial_times.recipe +++ b/recipes/financial_times.recipe @@ -56,34 +56,15 @@ class ft(BasicNewsRecipe): # br.submit() # return br - def get_browser(self, *args, **kw): - br = super().get_browser(*args, **kw) - br.set_current_header('Referer', 'https://www.google.com/') + def get_browser(self, *args, **kwargs): + kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' + br = BasicNewsRecipe.get_browser(self, *args, **kwargs) + br.addheaders += [ + ('Referer', 'https://www.google.com/'), + ('X-Forwarded-For', '66.249.66.1') + ] return br - # the print_version loads all articles but sometimes it might fail due to too many requests - # def print_version(self, url): - # return 'https://webcache.googleusercontent.com/search?q=cache:' + quote(url, safe='') - - def get_cover_url(self): - from datetime import date - cover = 'http://img.kiosko.net/' + str( - date.today().year - ) + '/' + date.today().strftime('%m') + '/' + date.today( - ).strftime('%d') + '/uk/ft_uk.750.jpg' - br = BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False) - try: - br.open(cover) - except: - index = 'https://en.kiosko.net/uk/np/ft_uk.html' - soup = self.index_to_soup(index) - for image in soup.findAll('img', src=True): - if image['src'].endswith('750.jpg'): - return image['src'] - self.log("\nCover unavailable") - cover = None - return cover - feeds = [ ('World', 'https://www.ft.com/world?format=rss'), ('US', 'https://www.ft.com/us?format=rss'),