Update VICE News

This commit is contained in:
Kovid Goyal 2020-09-05 20:14:00 +05:30
parent 138745a85a
commit f777b3c5c5
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -13,6 +13,12 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class VICENews(BasicNewsRecipe): class VICENews(BasicNewsRecipe):
__author__ = 'Adrian Tennessee (adrian.tennessee at domainthatnobodytakes.com)' __author__ = 'Adrian Tennessee (adrian.tennessee at domainthatnobodytakes.com)'
__license__ = 'GPLv3' __license__ = 'GPLv3'
@ -36,32 +42,18 @@ class VICENews(BasicNewsRecipe):
extra_css = '.article-title { font-size:125%; font-weight:bold }' extra_css = '.article-title { font-size:125%; font-weight:bold }'
keep_only_tags = [ keep_only_tags = [
# this is the image. comment it out if you don't want it classes('article__header__title contributors article__header__datebar__date--original short-form__body__article-body')
# desktop and lede version provide higher-res images
{'class': 'lede-image mobile'},
# publication information
{'class': 'article-header-inner'},
{'class': 'author'},
{'class': 'meta-time'},
# actual article
{'class': 'left-column'}
] ]
remove_tags = [ remove_tags = [
# remove topic section classes('lazy-vice-ad abc__article_embed article__tagged user-newsletter-signup article__embed-component'),
dict(name='p', attrs={'class': ['article-topic']}),
# remove side header
dict(name='header', attrs={
'class': ['site-header article-site-header']}),
# remove topic tags at end of article
dict(name='div', attrs={'class': ['article-topics-inner']}),
# remove social media buttons
dict(name='div', attrs={'class': ['socials']}),
# remove "Recommended"-header for recommended articles
dict(name='header', attrs={'class': ['widget-header']}),
# remove recommended article links
dict(name='div', attrs={
'class': ['widget-list-item article-feature-item']})
] ]
def preprocess_html(self, soup):
for img in soup.findAll(**classes('responsive-image__img')):
for source in img.findPreviousSiblings('source'):
img['src'] = source['srcset'].split('?')[0]
source.extract()
return soup
feeds = [(u'VICE News', u'https://news.vice.com/rss')] feeds = [(u'VICE News', u'https://news.vice.com/rss')]