Update VICE News

2025-07-09 03:04:10 -04:00 · 2020-09-05 20:14:00 +05:30 · 2020-09-05 20:14:00 +05:30 · f777b3c5c5
commit f777b3c5c5
parent 138745a85a
1 changed files with 15 additions and 23 deletions
--- a/recipes/vice.recipe
+++ b/recipes/vice.recipe
@ -13,6 +13,12 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 def classes(classes):
    q = frozenset(classes.split(' '))
    return dict(attrs={
        'class': lambda x: x and frozenset(x.split()).intersection(q)})
 class VICENews(BasicNewsRecipe):
    __author__ = 'Adrian Tennessee (adrian.tennessee at domainthatnobodytakes.com)'
    __license__ = 'GPLv3'
@ -36,32 +42,18 @@ class VICENews(BasicNewsRecipe):
    extra_css = '.article-title { font-size:125%; font-weight:bold }'
    keep_only_tags = [
-        # this is the image. comment it out if you don't want it
+        classes('article__header__title contributors article__header__datebar__date--original short-form__body__article-body')
        # desktop and lede version provide higher-res images
        {'class': 'lede-image mobile'},
        # publication information
        {'class': 'article-header-inner'},
        {'class': 'author'},
        {'class': 'meta-time'},
        # actual article
        {'class': 'left-column'}
    ]
    remove_tags = [
-        # remove topic section
+        classes('lazy-vice-ad abc__article_embed article__tagged user-newsletter-signup article__embed-component'),
        dict(name='p', attrs={'class': ['article-topic']}),
        # remove side header
        dict(name='header', attrs={
            'class': ['site-header article-site-header']}),
        # remove topic tags at end of article
        dict(name='div', attrs={'class': ['article-topics-inner']}),
        # remove social media buttons
        dict(name='div', attrs={'class': ['socials']}),
        # remove "Recommended"-header for recommended articles
        dict(name='header', attrs={'class': ['widget-header']}),
        # remove recommended article links
        dict(name='div', attrs={
            'class': ['widget-list-item article-feature-item']})
    ]
    def preprocess_html(self, soup):
        for img in soup.findAll(**classes('responsive-image__img')):
            for source in img.findPreviousSiblings('source'):
                img['src'] = source['srcset'].split('?')[0]
                source.extract()
        return soup
    feeds = [(u'VICE News', u'https://news.vice.com/rss')]