From 68851263a440ec16d33b34d70204011f25d8f3cc Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Tue, 29 Oct 2024 12:07:56 +0530
Subject: [PATCH 1/2] Update reuters.recipe

---
 recipes/reuters.recipe | 84 +++++++++++++++++++++++++++++++-----------
 1 file changed, 63 insertions(+), 21 deletions(-)

diff --git a/recipes/reuters.recipe b/recipes/reuters.recipe
index a9abcc5416..513568792a 100644
--- a/recipes/reuters.recipe
+++ b/recipes/reuters.recipe
@@ -4,7 +4,6 @@ import json
 import time
 from datetime import datetime, timedelta
 
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from calibre.web.feeds.news import BasicNewsRecipe
 
 
@@ -12,6 +11,7 @@ def p_dt(x):
     dt = datetime.fromisoformat(x[:-1]) + timedelta(seconds=time.timezone)
     return dt.strftime('%b %d, %Y, %I:%M %p')
 
+
 class Reuters(BasicNewsRecipe):
     title = 'Reuters'
     __author__ = 'unkn0wn'
@@ -20,28 +20,35 @@ class Reuters(BasicNewsRecipe):
         'reaching billions of people worldwide every day. Reuters provides business, financial, national and international '
         'news to professionals via desktop terminals, the world’s media organizations, industry events and directly to consumers.'
     )
-    masthead_url = 'https://www.reutersagency.com/wp-content/uploads/2024/06/reuters-logo.png'
-    cover_url = 'https://yt3.googleusercontent.com/ytc/AIdro_mk43b9eQwN15ZBDyMPDaElxvw4V-oUS9XDUvVnYB3gA9yA=s1024' 
+    masthead_url = (
+        'https://upload.wikimedia.org/wikipedia/commons/9/9e/Reuters_logo_2024.svg'
+    )
+    cover_url = 'https://yt3.googleusercontent.com/ytc/AIdro_mk43b9eQwN15ZBDyMPDaElxvw4V-oUS9XDUvVnYB3gA9yA=s1024'
     language = 'en'
     encoding = 'utf-8'
-    oldest_article = 1.2 # days
+    oldest_article = 1.2  # days
     no_javascript = True
     no_stylesheets = True
     remove_attributes = ['style', 'height', 'width']
     resolve_internal_links = True
     ignore_duplicate_articles = {'url', 'title'}
 
-    extra_css = '''
+    extra_css = """
         .label, .auth { font-size:small; color:#202020; }
         .figc { font-size:small; }
         img {display:block; margin:0 auto;}
-    '''
+    """
 
     recipe_specific_options = {
         'days': {
             'short': 'Oldest article to download from this news source. In days ',
             'long': 'For example, 0.5, gives you articles from the past 12 hours',
-            'default': str(oldest_article)
+            'default': str(oldest_article),
+        },
+        'res': {
+            'short': 'For hi-res images, select a resolution from the\nfollowing options: 960, 1080, 1200',
+            'long': 'This is useful for non e-ink devices',
+            'default': '480'
         }
     }
 
@@ -54,11 +61,22 @@ class Reuters(BasicNewsRecipe):
     def parse_index(self):
         index = 'https://www.reuters.com'
         today = datetime.now()
-        feed_api = index + '/arc/outboundfeeds/v3/mobile/section/{}/?from=0&size=50&outputType=json'
+        feed_api = (
+            index
+            + '/arc/outboundfeeds/v3/mobile/section/{}/?from=0&size=50&outputType=json'
+        )
         path_api = index + '/arc/outboundfeeds/v3/mobile{}?outputType=json'
         sections = [
-            'world', 'business', 'markets','sustainability', 'legal',
-            'breakingviews', 'technology', 'sports', 'science', 'lifestyle'
+            'world',
+            'business',
+            'markets',
+            'sustainability',
+            'legal',
+            'breakingviews',
+            'technology',
+            # 'sports',
+            'science',
+            # 'lifestyle',
         ]
 
         feeds = []
@@ -69,7 +87,9 @@ class Reuters(BasicNewsRecipe):
 
             articles = []
 
-            data = json.loads(self.index_to_soup(feed_api.format(sec), raw=True))['wireitems']
+            data = json.loads(self.index_to_soup(feed_api.format(sec), raw=True))[
+                'wireitems'
+            ]
 
             for x in data:
                 if x.get('wireitem_type', '') == 'story':
@@ -77,7 +97,9 @@ class Reuters(BasicNewsRecipe):
                         if y.get('type', '') == 'story':
                             title = y['story']['hed']
 
-                            date = datetime.fromisoformat(y['story']['updated_at'][:-1]) + timedelta(seconds=time.timezone)
+                            date = datetime.fromisoformat(
+                                y['story']['updated_at'][:-1]
+                            ) + timedelta(seconds=time.timezone)
                             if (today - date) > timedelta(self.oldest_article):
                                 continue
 
@@ -86,12 +108,18 @@ class Reuters(BasicNewsRecipe):
                             if path.get('type', '') == 'article':
                                 url = path_api.format(path['api_path_native'])
                                 self.log('            ', title, '\n\t', desc)
-                                articles.append({'title': title, 'description':desc, 'url': url})
+                                articles.append(
+                                    {'title': title, 'description': desc, 'url': url}
+                                )
             if articles:
                 feeds.append((section, articles))
         return feeds
 
     def preprocess_raw_html(self, raw, url):
+        res = '&width=480'
+        w = self.recipe_specific_options.get('res')
+        if w and isinstance(w, str):
+            res = '&width=' + w
         js = json.loads(raw)
         data = js['wireitems']
         body = ''
@@ -103,19 +131,30 @@ class Reuters(BasicNewsRecipe):
                         break
                 for y in x['templates']:
                     if 'title' in y['cid']:
-                        body += '<h1 title="{}">'.format(js['share_url']) + y['content'] + '</h1>'
+                        body += (
+                            '<h1 title="{}">'.format(js['share_url'])
+                            + y['content']
+                            + '</h1>'
+                        )
                         break
                 for y in x['templates']:
                     if 'author' in y['cid']:
                         body += '<p>'
                         auths = [x for x in y.get('authors_names', [])]
                         if auths:
-                            body += '<div class="auth">' + 'By ' + ', '.join(auths) + '</div>'
+                            body += (
+                                '<div class="auth">' + 'By ' + ', '.join(auths) + '</div>'
+                            )
                             break
                 for y in x['templates']:
                     if 'datetime' in y['cid']:
-                        body += '<div class="auth">' + str(y['read_minutes']) \
-                                + ' minute read | ' + p_dt(y['display_time']) + '</div>'
+                        body += (
+                            '<div class="auth">'
+                            + str(y['read_minutes'])
+                            + ' minute read | '
+                            + p_dt(y['display_time'])
+                            + '</div>'
+                        )
                         body += '</p>'
                         break
                 for y in x['templates']:
@@ -126,7 +165,8 @@ class Reuters(BasicNewsRecipe):
                     if 'image' in y['cid']:
                         if 'renditions' in y['image']:
                             body += '<img src="{}"><div class="figc">{}</div>'.format(
-                                y['image']['url'].split('&')[0] + '&width=480', y['image']['caption']
+                                y['image']['url'].split('&')[0] + res,
+                                y['image']['caption'],
                             )
                         else:
                             body += '<img src="{}"><div class="figc">{}</div>'.format(
@@ -136,7 +176,8 @@ class Reuters(BasicNewsRecipe):
                         for imgs in y['images']:
                             if 'renditions' in imgs:
                                 body += '<img src="{}"><div class="figc">{}</div>'.format(
-                                    imgs['url'].split('&')[0] + '&width=480', imgs['caption']
+                                    imgs['url'].split('&')[0] + res,
+                                    imgs['caption'],
                                 )
                             else:
                                 body += '<img src="{}"><div class="figc">{}</div>'.format(
@@ -144,9 +185,10 @@ class Reuters(BasicNewsRecipe):
                                 )
                     if 'video' in y['cid']:
                         body += '<img src="{}"><div class="figc">{}</div>'.format(
-                            y['video']['thumbnail']['url'], y['video']['thumbnail']['caption']
+                            y['video']['thumbnail']['url'],
+                            y['video']['thumbnail']['caption'],
                         )
-        return BeautifulSoup('<html><body><div>' + body + '</div></body></html>').prettify()
+        return '<html><body><div>' + body + '</div></body></html>'
 
     def populate_article_metadata(self, article, soup, first):
         article.url = soup.find('h1')['title']

From a7925b7d2e6ca7d1aa19bdb6a4319ea226b59aef Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Tue, 29 Oct 2024 12:10:43 +0530
Subject: [PATCH 2/2] Update indian_express.recipe

---
 recipes/indian_express.recipe | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/recipes/indian_express.recipe b/recipes/indian_express.recipe
index ee9b6f8b66..44d91edc24 100644
--- a/recipes/indian_express.recipe
+++ b/recipes/indian_express.recipe
@@ -99,16 +99,22 @@ class IndianExpress(BasicNewsRecipe):
 
     def articles_from_soup(self, soup):
         ans = []
-        div = soup.find('div', attrs={'class':['nation', 'o-opin']})
-        for art in div.findAll(attrs={'class':['articles', 'o-opin-article']}):
+        div = soup.find('div', attrs={'class': ['nation', 'o-opin', 'myie-nation']})
+        for art in div.findAll(
+            attrs={'class': ['articles', 'o-opin-article', 'myie-articles']}
+        ):
             for a in art.findAll('a', href=True):
-                if not a.find('img') and not ('/profile/' in a['href'] or '/agency/' in a['href']):
+                if not a.find('img') and not any(
+                    x in a['href'] for x in ['/profile/', '/agency/', '/section/']
+                ):
                     url = a['href']
                     title = self.tag_to_string(a)
                     desc = ''
-                    if p:= art.find('p'):
+                    if p := art.find('p'):
                         desc = self.tag_to_string(p)
-                    if da := art.find('div', attrs={'class':['date', 'o-opin-date']}):
+                    if da := art.find(
+                        'div', attrs={'class': ['date', 'o-opin-date', 'my-time']}
+                    ):
                         date = parse_date(self.tag_to_string(da)).replace(tzinfo=None)
                         today = datetime.now()
                         if (today - date) > timedelta(self.oldest_article):