calibre/recipes/vice.recipe
2020-09-05 20:14:00 +05:30

60 lines
1.9 KiB
Plaintext

##
# Title: Vice News recipe for calibre
# Author: Adrian Tennessee
# Contact: adrian.tennessee at domainthatnobodytakes.com
##
# License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
# Copyright: Copyright 2014 Adrian Tennessee
##
# Written: 2014-09-13
# Last Edited: 2014-09-13
##
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class VICENews(BasicNewsRecipe):
__author__ = 'Adrian Tennessee (adrian.tennessee at domainthatnobodytakes.com)'
__license__ = 'GPLv3'
__copyright__ = '2014, Adrian Tennessee <adrian.tennessee at domainthatnobodytakes.com)'
title = u'VICE News'
language = 'en'
description = u'VICE News web site ebook'
publisher = 'VICE Media'
category = 'news, world'
cover_url = 'https://upload.wikimedia.org/wikipedia/commons/d/dc/Vice_News_logo.jpg'
oldest_article = 7
max_articles_per_feed = 100
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
encoding = 'utf-8'
# article-title modifies h1-tag of article title
extra_css = '.article-title { font-size:125%; font-weight:bold }'
keep_only_tags = [
classes('article__header__title contributors article__header__datebar__date--original short-form__body__article-body')
]
remove_tags = [
classes('lazy-vice-ad abc__article_embed article__tagged user-newsletter-signup article__embed-component'),
]
def preprocess_html(self, soup):
for img in soup.findAll(**classes('responsive-image__img')):
for source in img.findPreviousSiblings('source'):
img['src'] = source['srcset'].split('?')[0]
source.extract()
return soup
feeds = [(u'VICE News', u'https://news.vice.com/rss')]