From 22d5e11cac2b7d029fb1cd83e9d328f5a8b7b3d0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 2 Dec 2018 12:58:14 +0530 Subject: [PATCH] Sports Illustrated by Kovid Goyal --- recipes/sports_illustrated.recipe | 57 +++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 recipes/sports_illustrated.recipe diff --git a/recipes/sports_illustrated.recipe b/recipes/sports_illustrated.recipe new file mode 100644 index 0000000000..859f0842ca --- /dev/null +++ b/recipes/sports_illustrated.recipe @@ -0,0 +1,57 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2018, Kovid Goyal + +from __future__ import absolute_import, division, print_function, unicode_literals + +from calibre.web.feeds.news import BasicNewsRecipe + + +def classes(classes): + q = frozenset(classes.split(' ')) + return dict(attrs={ + 'class': lambda x: x and frozenset(x.split()).intersection(q)}) + + +class SI(BasicNewsRecipe): + title = 'Sports Illustrated' + __author__ = 'Kovid Goyal' + language = 'en' + encoding = 'utf-8' + ignore_duplicate_articles = {'title', 'url'} + no_stylesheets = True + compress_news_images = True + compress_news_images_auto_size = 5 + remove_attributes = ['style'] + + keep_only_tags = [ + classes('headline article-content'), + ] + remove_tags = [ + classes('media-video OUTBRAIN'), + dict(name='meta'), + ] + + def preprocess_html(self, soup, *a): + for tag in soup.findAll(attrs={'data-src': True}): + tag.name = 'img' + del tag.contents[:] + tag['src'] = tag['data-src'] + print(tag) + return soup + + feeds = [ + ('Top stories', 'https://www.si.com/rss/si_topstories.rss'), + ('NFL', 'https://www.si.com/rss/si_nfl.rss'), + ('College Football', 'https://www.si.com/rss/si_ncaaf.rss'), + ('MLB', 'https://www.si.com/rss/si_mlb.rss'), + ('NBA', 'https://www.si.com/rss/si_nba.rss'), + ('College basketball', 'https://www.si.com/rss/si_ncaab.rss'), + ('NHL', 'https://www.si.com/rss/si_hockey.rss'), + ('Soccer', 'https://www.si.com/rss/si_soccer.rss'), + ('Tennis', 'https://www.si.com/rss/si_tennis.rss'), + ('Fantasy', 'https://www.si.com/rss/si_fantasy.rss'), + ('MMA', 'https://www.si.com/rss/si_mma.rss'), + ('Swim Daily', 'https://www.si.com/rss/si_swim_daily.rss'), + ('Writers', 'https://www.si.com/rss/si_writers.rss'), + ]