From 613bebe9ee6d616e1a0d5fdde3ee3a2b221f31da Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 24 May 2022 20:21:10 +0530
Subject: [PATCH] Update Outlook Magazine

---
 recipes/outlook_india.recipe | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/recipes/outlook_india.recipe b/recipes/outlook_india.recipe
index b2f94f4a4b..ef218a0be6 100644
--- a/recipes/outlook_india.recipe
+++ b/recipes/outlook_india.recipe
@@ -1,17 +1,30 @@
-import json, re
+import json
+import re
+
 from calibre.web.feeds.news import BasicNewsRecipe, classes
 
 
 class outlook(BasicNewsRecipe):
     title = 'Outlook Magazine'
     __author__ = 'unkn0wn'
-    description = ''
+    description = (
+        'Outlook covers the latest India news, analysis, business news and long-form stories on culture,'
+        ' money market and personal finance. Read India\'s best online magazine.'
+    )
     language = 'en_IN'
     use_embedded_content = False
     no_stylesheets = True
     remove_javascript = True
     remove_attributes = ['height', 'width', 'style']
     ignore_duplicate_articles = {'url'}
+    resolve_internal_links = True
+
+    keep_only_tags = [classes('__story_detail')]
+    remove_tags = [
+        classes(
+            'social_sharing_article left_trending left-sticky __tag_links next_prev_stories	downarrow uparrow more_from_author_links next prev'
+        )
+    ]
 
     def parse_index(self):
         soup = self.index_to_soup('https://www.outlookindia.com/')
@@ -31,13 +44,15 @@ class outlook(BasicNewsRecipe):
             desc = ''
             p = h3.find_next_sibling('p')
             if p:
-                desc = self.tag_to_string(desc)
-            self.log('\t\tFound article:', title)
-            self.log('\t\t\t', url)
+                desc = self.tag_to_string(p)
+            self.log('\t', title)
+            self.log('\t', desc)
+            self.log('\t\t', url)
             ans.append({'title': title, 'url': url, 'description': desc})
         return [('Articles', ans)]
 
     def preprocess_raw_html(self, raw, *a):
+        return raw
         m = re.search('<!-- NewsArticle Schema -->.*?script.*?>', raw, flags=re.DOTALL)
         raw = raw[m.end():].lstrip()
         data = json.JSONDecoder().raw_decode(raw)[0]