From 17479d1d17caf7b431b19fc5d53a33e6fe9eff0a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 14 Nov 2021 18:26:40 +0530 Subject: [PATCH] The Epoch Times by Kovid Goyal --- recipes/epoch_times.recipe | 42 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 recipes/epoch_times.recipe diff --git a/recipes/epoch_times.recipe b/recipes/epoch_times.recipe new file mode 100644 index 0000000000..ea1e32ecf3 --- /dev/null +++ b/recipes/epoch_times.recipe @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from calibre.web.feeds.news import BasicNewsRecipe, classes + + +class EpochTimes(BasicNewsRecipe): + title = 'The Epoch Times' + __author__ = 'Kovid Goyal' + description = 'US general news' + lang = 'en_US' + encoding = 'utf-8' + oldest_article = 2 + max_articles_per_feed = 15 + + keep_only_tags = [ + classes('post_title featured_image pricat_name author date post_content'), + ] + remove_tags = [ + classes('author_wrapper'), + ] + + feeds = [ + ('US', 'https://www.theepochtimes.com/c-us/feed/'), + ('World', 'https://www.theepochtimes.com/c-world/feed/'), + ('General', 'https://www.theepochtimes.com/feed/'), + ('Opinion', 'https://www.theepochtimes.com/c-opinion/feed/'), + ('Business and Economy', 'https://www.theepochtimes.com/c-business/feed/'), + ('Science', 'https://www.theepochtimes.com/c-science/feed/'), + ('Tech', 'https://www.theepochtimes.com/c-tech/feed/'), + ('Health', 'https://www.theepochtimes.com/c-wellness/feed/'), + ('Entertainment', 'https://www.theepochtimes.com/c-entertainment/feed/'), + ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-src': True}): + img['src'] = img['data-src'] + title = soup.find(attrs={'class': 'post_title'}) + fi = soup.find(attrs={'class': 'featured_image'}) + if title is not None and fi is not None: + title.extract() + fi.insert_before(title) + return soup