From 79b6ae352e3fffcc86821303c7b697b555a6b3a1 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Mon, 30 Oct 2023 22:08:04 +0530 Subject: [PATCH] Update financial_times.recipe --- recipes/financial_times.recipe | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/recipes/financial_times.recipe b/recipes/financial_times.recipe index ad9a8e639c..28534afcfb 100644 --- a/recipes/financial_times.recipe +++ b/recipes/financial_times.recipe @@ -1,5 +1,6 @@ import json import re +from calibre import browser from urllib.parse import quote from html5_parser import parse @@ -20,6 +21,7 @@ class ft(BasicNewsRecipe): resolve_internal_links = True remove_attributes = ['style', 'width', 'height'] masthead_url = 'https://im.ft-static.com/m/img/masthead_main.jpg' + simultaneous_downloads = 1 extra_css = ''' .article-info__time-byline {font-size:small; font-weight:bold; } @@ -38,6 +40,7 @@ class ft(BasicNewsRecipe): remove_tags = [ dict(name='aside', attrs={'class':'n-content-recommended--single-story'}), + dict(attrs={'data-layout-name':'card'}), classes('in-article-advert') ] @@ -57,13 +60,20 @@ class ft(BasicNewsRecipe): # return br def get_browser(self, *args, **kwargs): - kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' - br = BasicNewsRecipe.get_browser(self, *args, **kwargs) + return self + + def clone_browser(self, *args, **kwargs): + return self.get_browser() + + def open_novisit(self, *args, **kwargs): + br = browser(user_agent='Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)') br.addheaders += [ ('Referer', 'https://www.google.com/'), ('X-Forwarded-For', '66.249.66.1') ] - return br + return br.open_novisit(*args, **kwargs) + + open = open_novisit feeds = [ ('World', 'https://www.ft.com/world?format=rss'),