From 0e443392d72b37acb0bfe97720b33c06c71b36ab Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 16 Oct 2020 22:25:21 +0530 Subject: [PATCH] Update The Atlantic --- recipes/atlantic.recipe | 14 ++++++++++---- recipes/atlantic_com.recipe | 14 ++++++++++---- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/recipes/atlantic.recipe b/recipes/atlantic.recipe index 58f5923ee3..1c17ee0d0d 100644 --- a/recipes/atlantic.recipe +++ b/recipes/atlantic.recipe @@ -33,16 +33,20 @@ class TheAtlantic(BasicNewsRecipe): encoding = 'utf-8' keep_only_tags = [ - dict(id='rubric'), - dict(itemprop=['headline', 'image']), + dict(itemprop=['headline']), classes( - 'article-header c-article-meta lead-img article-cover-extra article-body article-magazine article-cover-content' + 'c-article-header__hed c-rubric article-header c-article-meta c-lead-media' + ' lead-img article-cover-extra article-body article-magazine article-cover-content' ), dict(itemprop='articleBody'), + # these are for photos articles + dict(id='article-header'), + classes('photos'), ] remove_tags = [ classes( - 'c-ad social-kit-top letter-writer-info callout secondary-byline embed-wrapper offset-wrapper boxtop-most-popular' + 'c-ad c-share-social social-kit-top letter-writer-info callout secondary-byline embed-wrapper' + ' offset-wrapper boxtop-most-popular social-icons hints read-more c-article-writer__social' ), { 'name': ['meta', 'link', 'noscript', 'aside', 'h3'] @@ -84,6 +88,8 @@ class TheAtlantic(BasicNewsRecipe): def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-srcset': True}): img['src'] = img['data-srcset'].split()[0] + for img in soup.findAll('img', attrs={'data-src': True}): + img['src'] = img['data-src'] return soup def print_version(self, url): diff --git a/recipes/atlantic_com.recipe b/recipes/atlantic_com.recipe index a7acd20ddc..c224e13c49 100644 --- a/recipes/atlantic_com.recipe +++ b/recipes/atlantic_com.recipe @@ -33,16 +33,20 @@ class TheAtlantic(BasicNewsRecipe): encoding = 'utf-8' keep_only_tags = [ - dict(id='rubric'), - dict(itemprop=['headline', 'image']), + dict(itemprop=['headline']), classes( - 'c-article-header__hed article-header c-article-meta lead-img article-cover-extra article-body article-magazine article-cover-content' + 'c-article-header__hed c-rubric article-header c-article-meta c-lead-media' + ' lead-img article-cover-extra article-body article-magazine article-cover-content' ), dict(itemprop='articleBody'), + # these are for photos articles + dict(id='article-header'), + classes('photos'), ] remove_tags = [ classes( - 'c-ad social-kit-top letter-writer-info callout secondary-byline embed-wrapper offset-wrapper boxtop-most-popular' + 'c-ad c-share-social social-kit-top letter-writer-info callout secondary-byline embed-wrapper' + ' offset-wrapper boxtop-most-popular social-icons hints read-more c-article-writer__social' ), { 'name': ['meta', 'link', 'noscript', 'aside', 'h3'] @@ -84,6 +88,8 @@ class TheAtlantic(BasicNewsRecipe): def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-srcset': True}): img['src'] = img['data-srcset'].split()[0] + for img in soup.findAll('img', attrs={'data-src': True}): + img['src'] = img['data-src'] return soup def print_version(self, url):