Update WaPo

This commit is contained in:
unkn0w7n 2024-08-05 09:20:18 +05:30
parent b41f5ac9c5
commit f87e766855
2 changed files with 6 additions and 4 deletions

View File

@ -23,7 +23,7 @@ class TheWashingtonPost(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
language = 'en_US'
remove_empty_feeds = True
ignore_duplicate_articles = {'url'}
masthead_url = 'https://www.washingtonpost.com/pb/resources/img/twp-masthead-415x57.svg'
@ -51,7 +51,7 @@ class TheWashingtonPost(BasicNewsRecipe):
'''
def get_browser(self):
return BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False, user_agent='common_words/based')
return BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False, user_agent='Mozilla/5.0 (Windows NT 10.0; rv:128.0) Gecko/20100101 Firefox/128.0')
# Official feeds: https://www.washingtonpost.com/discussions/2018/10/12/washington-post-rss-feeds/
feeds = [

View File

@ -1,3 +1,5 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
washingtonpost.com
'''
@ -22,7 +24,7 @@ class wapoprint(BasicNewsRecipe):
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
language = 'en'
language = 'en_US'
remove_attributes = ['style', 'height', 'width']
publication_type = 'newspaper'
ignore_duplicate_articles = {'title', 'url'}
@ -34,7 +36,7 @@ class wapoprint(BasicNewsRecipe):
'''
def get_browser(self):
return BasicNewsRecipe.get_browser(self, user_agent='common_words/based')
return BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False, user_agent='Mozilla/5.0 (Windows NT 10.0; rv:128.0) Gecko/20100101 Firefox/128.0')
def parse_index(self):
soup = self.index_to_soup('https://www.washingtonpost.com/todays_paper/updates/')