Update The Athletic

This commit is contained in:
Kovid Goyal 2022-10-13 07:57:46 +05:30
parent 0561f520a8
commit 6b74cde930
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,4 +1,4 @@
from calibre.web.feeds.news import BasicNewsRecipe, classes from calibre.web.feeds.news import BasicNewsRecipe
class Athletic(BasicNewsRecipe): class Athletic(BasicNewsRecipe):
@ -17,14 +17,14 @@ class Athletic(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
extra_css = ''' extra_css = '''
#articleByLineString{font-size:small;} #articleByLineString{font-size:small;}
.credits-text{font-size:small; text-align:center;} .inline-credits{font-size:small; text-align:center;}
.sc-66df40a5-3{font-size:small;}
''' '''
keep_only_tags = [dict(name='div', attrs={'id': 'body-container'})] keep_only_tags = [
dict(name='amp-img', attrs={'class': 'i-amphtml-layout-fill'}),
remove_tags_after = [ dict(name='div', attrs={'class': ['the-lead-article', 'article-container']})
dict(name='div', attrs={'id': 'newsLede'}), ]
classes('article-content-container') remove_tags = [
dict(name='i-amphtml-sizer')
] ]
feeds = [ feeds = [
@ -42,20 +42,11 @@ class Athletic(BasicNewsRecipe):
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for block in soup.findAll( for img in soup.findAll('amp-img'):
'img', attrs={'style': lambda x: x and x.startswith('display:block')} if not img.find('img'):
): img.name = 'img'
block.extract()
for space in soup.findAll(
**classes('MuiGrid-justify-content-xs-space-between')
):
space.extract()
for img in soup.findAll('img', attrs={'src': True}):
if img['src'].startswith('/_next/image/'):
img['src'] = 'https://theathletic.com' + img['src'].split('&')[
0] + '&w=828&q=75'
return soup return soup
def print_version(self, url): def print_version(self, url):
reset = url.split('?')[0] reset = url.split('?')[0] + '?amp=1'
return reset return reset