From 9fa7b717d93353aee87fca63e7c1211a3c8ed55c Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Thu, 2 Nov 2023 23:10:06 +0530 Subject: [PATCH] WaPo update --- recipes/wash_post.recipe | 3 ++- recipes/wash_post_print.recipe | 9 ++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/recipes/wash_post.recipe b/recipes/wash_post.recipe index 8e9e980633..35b3f7e61f 100644 --- a/recipes/wash_post.recipe +++ b/recipes/wash_post.recipe @@ -23,6 +23,7 @@ class TheWashingtonPost(BasicNewsRecipe): language = 'en' remove_empty_feeds = True ignore_duplicate_articles = {'url'} + masthead_url = 'https://www.washingtonpost.com/pb/resources/img/twp-masthead-415x57.svg' publication_type = 'newspaper' remove_attributes = ['style', 'width', 'height'] @@ -83,5 +84,5 @@ class TheWashingtonPost(BasicNewsRecipe): def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'src':True}): - img['src'] = 'https://www.washingtonpost.com/wp-apps/imrs.php?src=' + img['src'] + '&w=540' + img['src'] = 'https://www.washingtonpost.com/wp-apps/imrs.php?src=' + img['src'] + '&w=916' return soup diff --git a/recipes/wash_post_print.recipe b/recipes/wash_post_print.recipe index 625bcb68f5..9911f04f78 100644 --- a/recipes/wash_post_print.recipe +++ b/recipes/wash_post_print.recipe @@ -24,6 +24,7 @@ class wapoprint(BasicNewsRecipe): remove_attributes = ['style', 'height', 'width'] publication_type = 'newspaper' ignore_duplicate_articles = {'title', 'url'} + masthead_url = 'https://www.washingtonpost.com/pb/resources/img/twp-masthead-415x57.svg' extra_css = ''' .img { text-align:center; font-size:small; } .auth { font-weight:bold; font-size:small; } @@ -34,6 +35,8 @@ class wapoprint(BasicNewsRecipe): soup = self.index_to_soup('https://www.washingtonpost.com/todays_paper/updates/') if img := soup.find('img', attrs={'src':lambda x: x and x.endswith('_FrontPage.png')}): self.cover_url = img['src'] + if h2 := soup.find('h2', attrs={'class':lambda x: x and 'font--subhead' in x.split()}): + self.title = 'WaPo Print | ' + self.tag_to_string(h2) feeds = [] @@ -84,5 +87,9 @@ class wapoprint(BasicNewsRecipe): def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'src':True}): - img['src'] = 'https://www.washingtonpost.com/wp-apps/imrs.php?src=' + img['src'] + '&w=540' + img['src'] = 'https://www.washingtonpost.com/wp-apps/imrs.php?src=' + img['src'] + '&w=916' return soup + + def populate_article_metadata(self, article, soup, first): + article.summary = self.tag_to_string(soup.find('h3')) + article.text_summary = self.tag_to_string(soup.find('h3'))