Update The Atlantic

This commit is contained in:
Kovid Goyal 2020-10-16 22:25:21 +05:30
parent 64e2b05b5c
commit 0e443392d7
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 20 additions and 8 deletions

View File

@ -33,16 +33,20 @@ class TheAtlantic(BasicNewsRecipe):
encoding = 'utf-8'
keep_only_tags = [
dict(id='rubric'),
dict(itemprop=['headline', 'image']),
dict(itemprop=['headline']),
classes(
'article-header c-article-meta lead-img article-cover-extra article-body article-magazine article-cover-content'
'c-article-header__hed c-rubric article-header c-article-meta c-lead-media'
' lead-img article-cover-extra article-body article-magazine article-cover-content'
),
dict(itemprop='articleBody'),
# these are for photos articles
dict(id='article-header'),
classes('photos'),
]
remove_tags = [
classes(
'c-ad social-kit-top letter-writer-info callout secondary-byline embed-wrapper offset-wrapper boxtop-most-popular'
'c-ad c-share-social social-kit-top letter-writer-info callout secondary-byline embed-wrapper'
' offset-wrapper boxtop-most-popular social-icons hints read-more c-article-writer__social'
),
{
'name': ['meta', 'link', 'noscript', 'aside', 'h3']
@ -84,6 +88,8 @@ class TheAtlantic(BasicNewsRecipe):
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-srcset': True}):
img['src'] = img['data-srcset'].split()[0]
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src']
return soup
def print_version(self, url):

View File

@ -33,16 +33,20 @@ class TheAtlantic(BasicNewsRecipe):
encoding = 'utf-8'
keep_only_tags = [
dict(id='rubric'),
dict(itemprop=['headline', 'image']),
dict(itemprop=['headline']),
classes(
'c-article-header__hed article-header c-article-meta lead-img article-cover-extra article-body article-magazine article-cover-content'
'c-article-header__hed c-rubric article-header c-article-meta c-lead-media'
' lead-img article-cover-extra article-body article-magazine article-cover-content'
),
dict(itemprop='articleBody'),
# these are for photos articles
dict(id='article-header'),
classes('photos'),
]
remove_tags = [
classes(
'c-ad social-kit-top letter-writer-info callout secondary-byline embed-wrapper offset-wrapper boxtop-most-popular'
'c-ad c-share-social social-kit-top letter-writer-info callout secondary-byline embed-wrapper'
' offset-wrapper boxtop-most-popular social-icons hints read-more c-article-writer__social'
),
{
'name': ['meta', 'link', 'noscript', 'aside', 'h3']
@ -84,6 +88,8 @@ class TheAtlantic(BasicNewsRecipe):
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-srcset': True}):
img['src'] = img['data-srcset'].split()[0]
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src']
return soup
def print_version(self, url):