From f87e766855c39ef2fa25290d94d7a7c60c8e5378 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Mon, 5 Aug 2024 09:20:18 +0530 Subject: [PATCH] Update WaPo --- recipes/wash_post.recipe | 4 ++-- recipes/wash_post_print.recipe | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/recipes/wash_post.recipe b/recipes/wash_post.recipe index 73d869a905..58d62e834e 100644 --- a/recipes/wash_post.recipe +++ b/recipes/wash_post.recipe @@ -23,7 +23,7 @@ class TheWashingtonPost(BasicNewsRecipe): no_stylesheets = True encoding = 'utf8' use_embedded_content = False - language = 'en' + language = 'en_US' remove_empty_feeds = True ignore_duplicate_articles = {'url'} masthead_url = 'https://www.washingtonpost.com/pb/resources/img/twp-masthead-415x57.svg' @@ -51,7 +51,7 @@ class TheWashingtonPost(BasicNewsRecipe): ''' def get_browser(self): - return BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False, user_agent='common_words/based') + return BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False, user_agent='Mozilla/5.0 (Windows NT 10.0; rv:128.0) Gecko/20100101 Firefox/128.0') # Official feeds: https://www.washingtonpost.com/discussions/2018/10/12/washington-post-rss-feeds/ feeds = [ diff --git a/recipes/wash_post_print.recipe b/recipes/wash_post_print.recipe index 1e7d503aac..d1d0620f6b 100644 --- a/recipes/wash_post_print.recipe +++ b/recipes/wash_post_print.recipe @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 ''' washingtonpost.com ''' @@ -22,7 +24,7 @@ class wapoprint(BasicNewsRecipe): no_stylesheets = True use_embedded_content = False encoding = 'utf-8' - language = 'en' + language = 'en_US' remove_attributes = ['style', 'height', 'width'] publication_type = 'newspaper' ignore_duplicate_articles = {'title', 'url'} @@ -34,7 +36,7 @@ class wapoprint(BasicNewsRecipe): ''' def get_browser(self): - return BasicNewsRecipe.get_browser(self, user_agent='common_words/based') + return BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False, user_agent='Mozilla/5.0 (Windows NT 10.0; rv:128.0) Gecko/20100101 Firefox/128.0') def parse_index(self): soup = self.index_to_soup('https://www.washingtonpost.com/todays_paper/updates/')