From 8f1573b628bc337884de02c84df2305f5ed7308c Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Wed, 8 Nov 2023 10:37:49 +0530
Subject: [PATCH] TOI Print update

---
 ...heeconomictimes_india_print_edition.recipe |  3 +-
 recipes/toiprint.recipe                       | 45 ++++++++++---------
 recipes/wsj.recipe                            |  1 +
 recipes/wsj_free.recipe                       |  1 +
 4 files changed, 27 insertions(+), 23 deletions(-)
diff --git a/recipes/theeconomictimes_india_print_edition.recipe b/recipes/theeconomictimes_india_print_edition.recipe
index c610386a3e..293eb63e0d 100644
--- a/recipes/theeconomictimes_india_print_edition.recipe
+++ b/recipes/theeconomictimes_india_print_edition.recipe
@@ -79,11 +79,10 @@ class TheEconomicTimes(BasicNewsRecipe):
             for h3 in section.findAll(("h1", "h3", "h4", "h5")):
                 span = h3.find(
                     'span',
-                    href=lambda x: x and x.startswith('/epaper/'),
+                    href=lambda x: x and x.startswith('https://economictimes.indiatimes.com/epaper/'),
                     attrs={'class': 'banner'}
                 )
                 url = span['href']
-                url = 'https://economictimes.indiatimes.com' + url
                 title = self.tag_to_string(span)
                 div = h3.find_next_sibling('div', attrs={'class': 'dsc'})
                 if div is not None:
diff --git a/recipes/toiprint.recipe b/recipes/toiprint.recipe
index b2a961cf82..9f3127b14e 100644
--- a/recipes/toiprint.recipe
+++ b/recipes/toiprint.recipe
@@ -1,6 +1,7 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import json
 from datetime import date
+from collections import defaultdict
 
 
 # default edition is Delhi i.e., 'cap'
@@ -54,33 +55,32 @@ class toiprint(BasicNewsRecipe):
         url = index + '/DayIndex/' + date_ + '_' + le + '.json'
         raw = self.index_to_soup(url, raw=True)
         data = json.loads(raw)
-        if 'DigitalIndex' not in data:
+        if 'DayIndex' not in data:
             raise ValueError(
                     'The Times of India Newspaper is not published today.'
                 )
-        data = data['DigitalIndex']
-        feeds = []
+        data = data['DayIndex']
+        feeds_dict = defaultdict(list)
         for link in data:
             sec_name = link['PageTitle']
+            if sec_name == 'Advertisement':
+                continue
             self.log(sec_name)
             articles = []
-            if 'Views' in link:
-                for sec in link['Views']:
-                    if 'Articles' in sec:
-                        for art in sec['Articles']:
-                            if 'ArticleName' not in art:
-                                continue
-                            url = art['ArticleName']
-                            title = art.get('ArticleTitle', 'unknown').replace('<br>', '')
-                            if art.get('ColumnTitle', '') == '':
-                                desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '')
-                            else:
-                                desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ColumnTitle', '')
-                            self.log('\t', title, '\n\t', desc, '\n\t\t', url)
-                            articles.append({'title': title, 'description':desc, 'url': url})
-            if articles:
-                feeds.append((sec_name, articles))
-        return feeds
+            if 'Articles' in link:
+                for art in link['Articles']:
+                    section = sec_name
+                    if 'ArticleName' not in art:
+                        continue
+                    url = art['ArticleName']
+                    title = art.get('ArticleTitle', 'unknown').replace('<br>', '')
+                    if art.get('ColumnTitle', '') == '':
+                        desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '')
+                    else:
+                        desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ColumnTitle', '')
+                    self.log('\t', title, '\n\t', desc.replace('\n', ''))
+                    feeds_dict[section].append({"title": title, "url": url, "description": desc})
+        return [(section, articles) for section, articles in feeds_dict.items()]
 
     def preprocess_raw_html(self, raw, *a):
         data = json.loads(raw)
@@ -107,8 +107,11 @@ class toiprint(BasicNewsRecipe):
             elif 'ZoneText' in x:
                 body += '<p><i>' + x['ZoneText'] + '</i></p>'
         return '<html><body><div>' \
-                    + body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('&lt;br&gt;', '<p>').replace('\n', '<div>') \
+                    + body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('&lt;br&gt;', '<p>').replace('\n', '<br>') \
                         + '</div></body></html>'
 
     def print_version(self, url):
         return index + '/ArticleZoneJson/' + url.split('_')[-3] + '/' + url + '.json'
+
+    def populate_article_metadata(self, article, soup, first):
+        article.url = '***'
diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe
index ceeb4fd08f..9a8dbbff09 100644
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@@ -35,6 +35,7 @@ class WSJ(BasicNewsRecipe):
     __author__ = 'Kovid Goyal'
     description = 'News and current affairs'
     language = 'en'
+    masthead_url = 'https://s.wsj.net/media/wsj_amp_masthead_lg.png'
 
     compress_news_images = True
     compress_news_images_auto_size = 7
diff --git a/recipes/wsj_free.recipe b/recipes/wsj_free.recipe
index 6b5a264c50..8d45770393 100644
--- a/recipes/wsj_free.recipe
+++ b/recipes/wsj_free.recipe
@@ -35,6 +35,7 @@ class WSJ(BasicNewsRecipe):
     __author__ = 'Kovid Goyal'
     description = 'News and current affairs'
     language = 'en'
+    masthead_url = 'https://s.wsj.net/media/wsj_amp_masthead_lg.png'
 
     compress_news_images = True
     compress_news_images_auto_size = 7