diff --git a/recipes/frontline.recipe b/recipes/frontline.recipe new file mode 100644 index 0000000000..c93a4c533a --- /dev/null +++ b/recipes/frontline.recipe @@ -0,0 +1,67 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from calibre.web.feeds.news import BasicNewsRecipe, classes + + +class Frontline(BasicNewsRecipe): + title = u'Frontline' + __author__ = 'unkn0wn' + description = 'Frontline, the fortnightly English magazine from the stable of The Hindu, has been a distinguished presence in the media world since 1984.' + language = 'en_IN' + no_stylesheets = True + remove_javascript = True + use_embedded_content = False + encoding = 'utf-8' + oldest_article = 14 + max_articles_per_feed = 50 + ignore_duplicate_articles = {'url'} + # masthead_url = 'https://fl.thgim.com/static/theme/default/base/img/fllogo.png' + remove_attributes = ['height', 'width'] + + def get_cover_url(self): + soup = self.index_to_soup( + 'https://frontline.thehindu.com/current-issue/') + tag = soup.find(attrs={'class': 'imgWrapper'}) + if tag: + self.cover_url = tag.find('img')['data-proxy-image'].replace( + "FREE_100", "FREE_810") + return super().get_cover_url() + + # https://fl.thgim.com/incoming/b5zy2g/article38454943.ece/alternates/FREE_100/coverpng + + keep_only_tags = [ + classes( + 'overline mainart-title marginBottom10px articleBottomLine swiper-slide slide-caption artlead-text body-main' + ) + ] + remove_tags = [classes('dispatche-middle bigtitle')] + + remove_tags_after = [ + classes('body-main'), + ] + + feeds = [ + ('Cover Story', + 'https://frontline.thehindu.com/cover-story/feeder/default.rss'), + ('The Nation', + 'https://frontline.thehindu.com/the-nation/feeder/default.rss'), + ('World Affairs', + 'https://frontline.thehindu.com/world-affairs/feeder/default.rss'), + ('Politics', + 'https://frontline.thehindu.com/politics/feeder/default.rss'), + ('Arts & Culture', + 'https://frontline.thehindu.com/arts-and-culture/feeder/default.rss'), + ('Social Issues', + 'https://frontline.thehindu.com/social-issues/feeder/default.rss'), + ('Books', 'https://frontline.thehindu.com/books/feeder/default.rss'), + ('Columns', + 'https://frontline.thehindu.com/columns/feeder/default.rss'), + ('Others', 'https://frontline.thehindu.com/other/feeder/default.rss'), + ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-proxy-image': True}): + # replace FREE_810 with FREE_615 FREE_1200.. FREE_300. + img['src'] = img['data-proxy-image'].replace( + "FREE_100", "FREE_810") + return soup