diff --git a/recipes/wash_post.recipe b/recipes/wash_post.recipe index 8e9e980633..35b3f7e61f 100644 --- a/recipes/wash_post.recipe +++ b/recipes/wash_post.recipe @@ -23,6 +23,7 @@ class TheWashingtonPost(BasicNewsRecipe): language = 'en' remove_empty_feeds = True ignore_duplicate_articles = {'url'} + masthead_url = 'https://www.washingtonpost.com/pb/resources/img/twp-masthead-415x57.svg' publication_type = 'newspaper' remove_attributes = ['style', 'width', 'height'] @@ -83,5 +84,5 @@ class TheWashingtonPost(BasicNewsRecipe): def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'src':True}): - img['src'] = 'https://www.washingtonpost.com/wp-apps/imrs.php?src=' + img['src'] + '&w=540' + img['src'] = 'https://www.washingtonpost.com/wp-apps/imrs.php?src=' + img['src'] + '&w=916' return soup diff --git a/recipes/wash_post_print.recipe b/recipes/wash_post_print.recipe index 625bcb68f5..9911f04f78 100644 --- a/recipes/wash_post_print.recipe +++ b/recipes/wash_post_print.recipe @@ -24,6 +24,7 @@ class wapoprint(BasicNewsRecipe): remove_attributes = ['style', 'height', 'width'] publication_type = 'newspaper' ignore_duplicate_articles = {'title', 'url'} + masthead_url = 'https://www.washingtonpost.com/pb/resources/img/twp-masthead-415x57.svg' extra_css = ''' .img { text-align:center; font-size:small; } .auth { font-weight:bold; font-size:small; } @@ -34,6 +35,8 @@ class wapoprint(BasicNewsRecipe): soup = self.index_to_soup('https://www.washingtonpost.com/todays_paper/updates/') if img := soup.find('img', attrs={'src':lambda x: x and x.endswith('_FrontPage.png')}): self.cover_url = img['src'] + if h2 := soup.find('h2', attrs={'class':lambda x: x and 'font--subhead' in x.split()}): + self.title = 'WaPo Print | ' + self.tag_to_string(h2) feeds = [] @@ -84,5 +87,9 @@ class wapoprint(BasicNewsRecipe): def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'src':True}): - img['src'] = 'https://www.washingtonpost.com/wp-apps/imrs.php?src=' + img['src'] + '&w=540' + img['src'] = 'https://www.washingtonpost.com/wp-apps/imrs.php?src=' + img['src'] + '&w=916' return soup + + def populate_article_metadata(self, article, soup, first): + article.summary = self.tag_to_string(soup.find('h3')) + article.text_summary = self.tag_to_string(soup.find('h3'))