From 57a645e5037bb3c2926685f9570c145c09025cf0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 18 Jun 2025 19:21:11 +0530 Subject: [PATCH] Change user agent for NYT recipes --- recipes/nyt_magazine.recipe | 8 +------- recipes/nyt_tmag.recipe | 8 +------- recipes/nytfeeds.recipe | 8 +------- recipes/nytimes.recipe | 6 +----- recipes/nytimes_sub.recipe | 6 +----- recipes/nytimes_tech.recipe | 8 +------- recipes/nytimesbook.recipe | 6 +----- 7 files changed, 7 insertions(+), 43 deletions(-) diff --git a/recipes/nyt_magazine.recipe b/recipes/nyt_magazine.recipe index e548976fcc..19761640c5 100644 --- a/recipes/nyt_magazine.recipe +++ b/recipes/nyt_magazine.recipe @@ -107,14 +107,8 @@ class NytMag(BasicNewsRecipe): return self.nyt_parser.extract_html(self.index_to_soup(raw_html), url) def get_browser(self, *args, **kwargs): - kwargs['user_agent'] = ( - 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' - ) + kwargs['user_agent'] = 'User-Agent: Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)' br = BasicNewsRecipe.get_browser(self, *args, **kwargs) - br.addheaders += [ - ('Referer', 'https://www.google.com/'), - ('X-Forwarded-For', '66.249.66.1'), - ] return br def preprocess_html(self, soup): diff --git a/recipes/nyt_tmag.recipe b/recipes/nyt_tmag.recipe index 718a37adcb..c5a4655243 100644 --- a/recipes/nyt_tmag.recipe +++ b/recipes/nyt_tmag.recipe @@ -107,14 +107,8 @@ class NytMag(BasicNewsRecipe): return self.nyt_parser.extract_html(self.index_to_soup(raw_html), url) def get_browser(self, *args, **kwargs): - kwargs['user_agent'] = ( - 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' - ) + kwargs['user_agent'] = 'User-Agent: Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)' br = BasicNewsRecipe.get_browser(self, *args, **kwargs) - br.addheaders += [ - ('Referer', 'https://www.google.com/'), - ('X-Forwarded-For', '66.249.66.1'), - ] return br def preprocess_html(self, soup): diff --git a/recipes/nytfeeds.recipe b/recipes/nytfeeds.recipe index 765442ca2f..20163bc27e 100644 --- a/recipes/nytfeeds.recipe +++ b/recipes/nytfeeds.recipe @@ -146,14 +146,8 @@ class NytFeeds(BasicNewsRecipe): return self.nyt_parser.extract_html(self.index_to_soup(raw_html), url) def get_browser(self, *args, **kwargs): - kwargs['user_agent'] = ( - 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' - ) + kwargs['user_agent'] = 'User-Agent: Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)' br = BasicNewsRecipe.get_browser(self, *args, **kwargs) - br.addheaders += [ - ('Referer', 'https://www.google.com/'), - ('X-Forwarded-For', '66.249.66.1'), - ] return br def preprocess_html(self, soup): diff --git a/recipes/nytimes.recipe b/recipes/nytimes.recipe index bb87ca3fa1..45d579a1d8 100644 --- a/recipes/nytimes.recipe +++ b/recipes/nytimes.recipe @@ -285,12 +285,8 @@ class NewYorkTimes(BasicNewsRecipe): return self.parse_todays_page() def get_browser(self, *args, **kwargs): - kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' + kwargs['user_agent'] = 'User-Agent: Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)' br = BasicNewsRecipe.get_browser(self, *args, **kwargs) - br.addheaders += [ - ('Referer', 'https://www.google.com/'), - ('X-Forwarded-For', '66.249.66.1') - ] return br def preprocess_html(self, soup): diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe index c4b1adad3c..fbeebed4ee 100644 --- a/recipes/nytimes_sub.recipe +++ b/recipes/nytimes_sub.recipe @@ -285,12 +285,8 @@ class NewYorkTimes(BasicNewsRecipe): return self.parse_todays_page() def get_browser(self, *args, **kwargs): - kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' + kwargs['user_agent'] = 'User-Agent: Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)' br = BasicNewsRecipe.get_browser(self, *args, **kwargs) - br.addheaders += [ - ('Referer', 'https://www.google.com/'), - ('X-Forwarded-For', '66.249.66.1') - ] return br def preprocess_html(self, soup): diff --git a/recipes/nytimes_tech.recipe b/recipes/nytimes_tech.recipe index 5d76fcd862..57b23f1723 100644 --- a/recipes/nytimes_tech.recipe +++ b/recipes/nytimes_tech.recipe @@ -101,14 +101,8 @@ class NytTech(BasicNewsRecipe): return self.nyt_parser.extract_html(self.index_to_soup(raw_html), url) def get_browser(self, *args, **kwargs): - kwargs['user_agent'] = ( - 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' - ) + kwargs['user_agent'] = 'User-Agent: Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)' br = BasicNewsRecipe.get_browser(self, *args, **kwargs) - br.addheaders += [ - ('Referer', 'https://www.google.com/'), - ('X-Forwarded-For', '66.249.66.1'), - ] return br def preprocess_html(self, soup): diff --git a/recipes/nytimesbook.recipe b/recipes/nytimesbook.recipe index f35ddcda2a..17dd224dc5 100644 --- a/recipes/nytimesbook.recipe +++ b/recipes/nytimesbook.recipe @@ -141,12 +141,8 @@ class NewYorkTimesBookReview(BasicNewsRecipe): return parse_toc(data, self.log) def get_browser(self, *args, **kwargs): - kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' + kwargs['user_agent'] = 'User-Agent: Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)' br = BasicNewsRecipe.get_browser(self, *args, **kwargs) - br.addheaders += [ - ('Referer', 'https://www.google.com/'), - ('X-Forwarded-For', '66.249.66.1') - ] return br def preprocess_html(self, soup):