mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Update The Atlantic
This commit is contained in:
parent
d4c2b82582
commit
c9193e3e53
@ -9,6 +9,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
|
|
||||||
web_version = False
|
web_version = False
|
||||||
|
test_article = None
|
||||||
|
# test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed'
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
def classes(classes):
|
||||||
@ -18,6 +20,19 @@ def classes(classes):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def prefix_classes(classes):
|
||||||
|
q = classes.split()
|
||||||
|
|
||||||
|
def test(x):
|
||||||
|
if x:
|
||||||
|
for cls in x.split():
|
||||||
|
for c in q:
|
||||||
|
if cls.startswith(c):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
return dict(attrs={'class': test})
|
||||||
|
|
||||||
|
|
||||||
class TheAtlantic(BasicNewsRecipe):
|
class TheAtlantic(BasicNewsRecipe):
|
||||||
|
|
||||||
if web_version:
|
if web_version:
|
||||||
@ -38,6 +53,9 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
'c-article-header__hed c-rubric article-header c-article-meta c-lead-media'
|
'c-article-header__hed c-rubric article-header c-article-meta c-lead-media'
|
||||||
' lead-img article-cover-extra article-body article-magazine article-cover-content'
|
' lead-img article-cover-extra article-body article-magazine article-cover-content'
|
||||||
),
|
),
|
||||||
|
prefix_classes(
|
||||||
|
'ArticleHeader_root__ ArticleLayoutSection_main__'
|
||||||
|
),
|
||||||
dict(itemprop='articleBody'),
|
dict(itemprop='articleBody'),
|
||||||
# these are for photos articles
|
# these are for photos articles
|
||||||
dict(id='article-header'),
|
dict(id='article-header'),
|
||||||
@ -45,9 +63,10 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
classes(
|
classes(
|
||||||
'c-ad c-share-social social-kit-top letter-writer-info callout secondary-byline embed-wrapper'
|
'c-ad c-share-social c-recirculation-link social-kit-top letter-writer-info callout secondary-byline embed-wrapper'
|
||||||
' offset-wrapper boxtop-most-popular social-icons hints read-more c-article-writer__social'
|
' offset-wrapper boxtop-most-popular social-icons hints read-more c-article-writer__social'
|
||||||
),
|
),
|
||||||
|
prefix_classes('ArticleRecirc_inline__'),
|
||||||
{
|
{
|
||||||
'name': ['meta', 'link', 'noscript', 'aside', 'h3']
|
'name': ['meta', 'link', 'noscript', 'aside', 'h3']
|
||||||
},
|
},
|
||||||
@ -103,7 +122,7 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
ans = None
|
ans = None
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
if web_version:
|
if web_version and not test_article:
|
||||||
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
@ -129,6 +148,8 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
if test_article:
|
||||||
|
return [('Articles', [{'title': 'Test article', 'url': test_article}])]
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
figure = soup.find('figure', id='cover-image')
|
figure = soup.find('figure', id='cover-image')
|
||||||
if figure is not None:
|
if figure is not None:
|
||||||
|
@ -9,6 +9,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
|
|
||||||
|
|
||||||
web_version = True
|
web_version = True
|
||||||
|
test_article = None
|
||||||
|
# test_article = 'https://www.theatlantic.com/health/archive/2020/12/covid-19-second-surge/617415/?utm_source=feed'
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
def classes(classes):
|
||||||
@ -18,6 +20,19 @@ def classes(classes):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def prefix_classes(classes):
|
||||||
|
q = classes.split()
|
||||||
|
|
||||||
|
def test(x):
|
||||||
|
if x:
|
||||||
|
for cls in x.split():
|
||||||
|
for c in q:
|
||||||
|
if cls.startswith(c):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
return dict(attrs={'class': test})
|
||||||
|
|
||||||
|
|
||||||
class TheAtlantic(BasicNewsRecipe):
|
class TheAtlantic(BasicNewsRecipe):
|
||||||
|
|
||||||
if web_version:
|
if web_version:
|
||||||
@ -38,6 +53,9 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
'c-article-header__hed c-rubric article-header c-article-meta c-lead-media'
|
'c-article-header__hed c-rubric article-header c-article-meta c-lead-media'
|
||||||
' lead-img article-cover-extra article-body article-magazine article-cover-content'
|
' lead-img article-cover-extra article-body article-magazine article-cover-content'
|
||||||
),
|
),
|
||||||
|
prefix_classes(
|
||||||
|
'ArticleHeader_root__ ArticleLayoutSection_main__'
|
||||||
|
),
|
||||||
dict(itemprop='articleBody'),
|
dict(itemprop='articleBody'),
|
||||||
# these are for photos articles
|
# these are for photos articles
|
||||||
dict(id='article-header'),
|
dict(id='article-header'),
|
||||||
@ -45,9 +63,10 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
classes(
|
classes(
|
||||||
'c-ad c-share-social social-kit-top letter-writer-info callout secondary-byline embed-wrapper'
|
'c-ad c-share-social c-recirculation-link social-kit-top letter-writer-info callout secondary-byline embed-wrapper'
|
||||||
' offset-wrapper boxtop-most-popular social-icons hints read-more c-article-writer__social'
|
' offset-wrapper boxtop-most-popular social-icons hints read-more c-article-writer__social'
|
||||||
),
|
),
|
||||||
|
prefix_classes('ArticleRecirc_inline__'),
|
||||||
{
|
{
|
||||||
'name': ['meta', 'link', 'noscript', 'aside', 'h3']
|
'name': ['meta', 'link', 'noscript', 'aside', 'h3']
|
||||||
},
|
},
|
||||||
@ -103,7 +122,7 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
ans = None
|
ans = None
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
if web_version:
|
if web_version and not test_article:
|
||||||
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
@ -129,6 +148,8 @@ class TheAtlantic(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
if test_article:
|
||||||
|
return [('Articles', [{'title': 'Test article', 'url': test_article}])]
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
figure = soup.find('figure', id='cover-image')
|
figure = soup.find('figure', id='cover-image')
|
||||||
if figure is not None:
|
if figure is not None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user