From 493338f4a0a33839dcd54de44b46e335af66adbf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 9 Sep 2021 22:53:35 +0530 Subject: [PATCH] The Week by Kovid Goyal --- recipes/the_week.recipe | 57 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 recipes/the_week.recipe diff --git a/recipes/the_week.recipe b/recipes/the_week.recipe new file mode 100644 index 0000000000..dc1894f42b --- /dev/null +++ b/recipes/the_week.recipe @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2021, Kovid Goyal + + +from calibre.web.feeds.news import BasicNewsRecipe + + +def fix_title(title): + return title.replace('-', ' ').capitalize() + + +class TheWeek(BasicNewsRecipe): + title = u'The Week' + language = 'en_IN' + __author__ = 'Kovid Goyal' + encoding = 'utf-8' + oldest_article = 15 # days + max_articles_per_feed = 25 + no_stylesheets = True + use_embedded_content = True + ignore_duplicate_articles = {'url'} + + feeds = [ + ('Cover Story', 'https://www.theweek.in/theweek/cover.rss'), + ('Sports', 'https://www.theweek.in/theweek/sports.rss'), + ('Current', 'https://www.theweek.in/theweek/current.rss'), + ('Statescan', 'https://www.theweek.in/theweek/statescan.rss'), + ('Leisure', 'https://www.theweek.in/theweek/leisure.rss'), + ('Business', 'https://www.theweek.in/theweek/business.rss'), + ('Specials', 'https://www.theweek.in/theweek/specials.rss'), + ('More', 'https://www.theweek.in/theweek/more.rss'), + ('Society', 'https://www.theweek.in/leisure/society.rss'), + ] + + def get_cover_url(self): + soup = self.index_to_soup('https://www.theweek.in/theweek.html') + for img in soup.findAll('img', attrs={'data-src-web': lambda x: x and '/cover-magazine' in x}): + src = img['data-src-web'] + try: + idx = src.rfind('.image.') + except Exception: + pass + else: + if idx > -1: + src = src[:idx] + return 'https://img.theweek.in' + src + + def preprocess_html(self, soup): + a = soup.find('a') + a.name = 'span' + h2 = soup.find('h2') + h2.string = fix_title(h2.string) + return soup + + def populate_article_metadata(self, article, soup, first): + article.title = fix_title(article.title)