Change user agent for NYT recipes

This commit is contained in:
Kovid Goyal 2025-06-18 19:21:11 +05:30
parent c3dc09d862
commit 57a645e503
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
7 changed files with 7 additions and 43 deletions

View File

@ -107,14 +107,8 @@ class NytMag(BasicNewsRecipe):
return self.nyt_parser.extract_html(self.index_to_soup(raw_html), url) return self.nyt_parser.extract_html(self.index_to_soup(raw_html), url)
def get_browser(self, *args, **kwargs): def get_browser(self, *args, **kwargs):
kwargs['user_agent'] = ( kwargs['user_agent'] = 'User-Agent: Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)'
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
)
br = BasicNewsRecipe.get_browser(self, *args, **kwargs) br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
br.addheaders += [
('Referer', 'https://www.google.com/'),
('X-Forwarded-For', '66.249.66.1'),
]
return br return br
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -107,14 +107,8 @@ class NytMag(BasicNewsRecipe):
return self.nyt_parser.extract_html(self.index_to_soup(raw_html), url) return self.nyt_parser.extract_html(self.index_to_soup(raw_html), url)
def get_browser(self, *args, **kwargs): def get_browser(self, *args, **kwargs):
kwargs['user_agent'] = ( kwargs['user_agent'] = 'User-Agent: Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)'
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
)
br = BasicNewsRecipe.get_browser(self, *args, **kwargs) br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
br.addheaders += [
('Referer', 'https://www.google.com/'),
('X-Forwarded-For', '66.249.66.1'),
]
return br return br
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -146,14 +146,8 @@ class NytFeeds(BasicNewsRecipe):
return self.nyt_parser.extract_html(self.index_to_soup(raw_html), url) return self.nyt_parser.extract_html(self.index_to_soup(raw_html), url)
def get_browser(self, *args, **kwargs): def get_browser(self, *args, **kwargs):
kwargs['user_agent'] = ( kwargs['user_agent'] = 'User-Agent: Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)'
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
)
br = BasicNewsRecipe.get_browser(self, *args, **kwargs) br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
br.addheaders += [
('Referer', 'https://www.google.com/'),
('X-Forwarded-For', '66.249.66.1'),
]
return br return br
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -285,12 +285,8 @@ class NewYorkTimes(BasicNewsRecipe):
return self.parse_todays_page() return self.parse_todays_page()
def get_browser(self, *args, **kwargs): def get_browser(self, *args, **kwargs):
kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' kwargs['user_agent'] = 'User-Agent: Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)'
br = BasicNewsRecipe.get_browser(self, *args, **kwargs) br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
br.addheaders += [
('Referer', 'https://www.google.com/'),
('X-Forwarded-For', '66.249.66.1')
]
return br return br
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -285,12 +285,8 @@ class NewYorkTimes(BasicNewsRecipe):
return self.parse_todays_page() return self.parse_todays_page()
def get_browser(self, *args, **kwargs): def get_browser(self, *args, **kwargs):
kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' kwargs['user_agent'] = 'User-Agent: Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)'
br = BasicNewsRecipe.get_browser(self, *args, **kwargs) br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
br.addheaders += [
('Referer', 'https://www.google.com/'),
('X-Forwarded-For', '66.249.66.1')
]
return br return br
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -101,14 +101,8 @@ class NytTech(BasicNewsRecipe):
return self.nyt_parser.extract_html(self.index_to_soup(raw_html), url) return self.nyt_parser.extract_html(self.index_to_soup(raw_html), url)
def get_browser(self, *args, **kwargs): def get_browser(self, *args, **kwargs):
kwargs['user_agent'] = ( kwargs['user_agent'] = 'User-Agent: Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)'
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
)
br = BasicNewsRecipe.get_browser(self, *args, **kwargs) br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
br.addheaders += [
('Referer', 'https://www.google.com/'),
('X-Forwarded-For', '66.249.66.1'),
]
return br return br
def preprocess_html(self, soup): def preprocess_html(self, soup):

View File

@ -141,12 +141,8 @@ class NewYorkTimesBookReview(BasicNewsRecipe):
return parse_toc(data, self.log) return parse_toc(data, self.log)
def get_browser(self, *args, **kwargs): def get_browser(self, *args, **kwargs):
kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' kwargs['user_agent'] = 'User-Agent: Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)'
br = BasicNewsRecipe.get_browser(self, *args, **kwargs) br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
br.addheaders += [
('Referer', 'https://www.google.com/'),
('X-Forwarded-For', '66.249.66.1')
]
return br return br
def preprocess_html(self, soup): def preprocess_html(self, soup):