mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Update The Atlantic
This commit is contained in:
parent
64e2b05b5c
commit
0e443392d7
@ -33,16 +33,20 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(id='rubric'),
|
dict(itemprop=['headline']),
|
||||||
dict(itemprop=['headline', 'image']),
|
|
||||||
classes(
|
classes(
|
||||||
'article-header c-article-meta lead-img article-cover-extra article-body article-magazine article-cover-content'
|
'c-article-header__hed c-rubric article-header c-article-meta c-lead-media'
|
||||||
|
' lead-img article-cover-extra article-body article-magazine article-cover-content'
|
||||||
),
|
),
|
||||||
dict(itemprop='articleBody'),
|
dict(itemprop='articleBody'),
|
||||||
|
# these are for photos articles
|
||||||
|
dict(id='article-header'),
|
||||||
|
classes('photos'),
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
classes(
|
classes(
|
||||||
'c-ad social-kit-top letter-writer-info callout secondary-byline embed-wrapper offset-wrapper boxtop-most-popular'
|
'c-ad c-share-social social-kit-top letter-writer-info callout secondary-byline embed-wrapper'
|
||||||
|
' offset-wrapper boxtop-most-popular social-icons hints read-more c-article-writer__social'
|
||||||
),
|
),
|
||||||
{
|
{
|
||||||
'name': ['meta', 'link', 'noscript', 'aside', 'h3']
|
'name': ['meta', 'link', 'noscript', 'aside', 'h3']
|
||||||
@ -84,6 +88,8 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for img in soup.findAll('img', attrs={'data-srcset': True}):
|
for img in soup.findAll('img', attrs={'data-srcset': True}):
|
||||||
img['src'] = img['data-srcset'].split()[0]
|
img['src'] = img['data-srcset'].split()[0]
|
||||||
|
for img in soup.findAll('img', attrs={'data-src': True}):
|
||||||
|
img['src'] = img['data-src']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
|
@ -33,16 +33,20 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(id='rubric'),
|
dict(itemprop=['headline']),
|
||||||
dict(itemprop=['headline', 'image']),
|
|
||||||
classes(
|
classes(
|
||||||
'c-article-header__hed article-header c-article-meta lead-img article-cover-extra article-body article-magazine article-cover-content'
|
'c-article-header__hed c-rubric article-header c-article-meta c-lead-media'
|
||||||
|
' lead-img article-cover-extra article-body article-magazine article-cover-content'
|
||||||
),
|
),
|
||||||
dict(itemprop='articleBody'),
|
dict(itemprop='articleBody'),
|
||||||
|
# these are for photos articles
|
||||||
|
dict(id='article-header'),
|
||||||
|
classes('photos'),
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
classes(
|
classes(
|
||||||
'c-ad social-kit-top letter-writer-info callout secondary-byline embed-wrapper offset-wrapper boxtop-most-popular'
|
'c-ad c-share-social social-kit-top letter-writer-info callout secondary-byline embed-wrapper'
|
||||||
|
' offset-wrapper boxtop-most-popular social-icons hints read-more c-article-writer__social'
|
||||||
),
|
),
|
||||||
{
|
{
|
||||||
'name': ['meta', 'link', 'noscript', 'aside', 'h3']
|
'name': ['meta', 'link', 'noscript', 'aside', 'h3']
|
||||||
@ -84,6 +88,8 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for img in soup.findAll('img', attrs={'data-srcset': True}):
|
for img in soup.findAll('img', attrs={'data-srcset': True}):
|
||||||
img['src'] = img['data-srcset'].split()[0]
|
img['src'] = img['data-srcset'].split()[0]
|
||||||
|
for img in soup.findAll('img', attrs={'data-src': True}):
|
||||||
|
img['src'] = img['data-src']
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user