From 613bebe9ee6d616e1a0d5fdde3ee3a2b221f31da Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 24 May 2022 20:21:10 +0530 Subject: [PATCH] Update Outlook Magazine --- recipes/outlook_india.recipe | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/recipes/outlook_india.recipe b/recipes/outlook_india.recipe index b2f94f4a4b..ef218a0be6 100644 --- a/recipes/outlook_india.recipe +++ b/recipes/outlook_india.recipe @@ -1,17 +1,30 @@ -import json, re +import json +import re + from calibre.web.feeds.news import BasicNewsRecipe, classes class outlook(BasicNewsRecipe): title = 'Outlook Magazine' __author__ = 'unkn0wn' - description = '' + description = ( + 'Outlook covers the latest India news, analysis, business news and long-form stories on culture,' + ' money market and personal finance. Read India\'s best online magazine.' + ) language = 'en_IN' use_embedded_content = False no_stylesheets = True remove_javascript = True remove_attributes = ['height', 'width', 'style'] ignore_duplicate_articles = {'url'} + resolve_internal_links = True + + keep_only_tags = [classes('__story_detail')] + remove_tags = [ + classes( + 'social_sharing_article left_trending left-sticky __tag_links next_prev_stories downarrow uparrow more_from_author_links next prev' + ) + ] def parse_index(self): soup = self.index_to_soup('https://www.outlookindia.com/') @@ -31,13 +44,15 @@ class outlook(BasicNewsRecipe): desc = '' p = h3.find_next_sibling('p') if p: - desc = self.tag_to_string(desc) - self.log('\t\tFound article:', title) - self.log('\t\t\t', url) + desc = self.tag_to_string(p) + self.log('\t', title) + self.log('\t', desc) + self.log('\t\t', url) ans.append({'title': title, 'url': url, 'description': desc}) return [('Articles', ans)] def preprocess_raw_html(self, raw, *a): + return raw m = re.search('.*?script.*?>', raw, flags=re.DOTALL) raw = raw[m.end():].lstrip() data = json.JSONDecoder().raw_decode(raw)[0]