Update The Week

2025-07-09 03:04:10 -04:00 · 2022-04-04 15:14:51 +05:30 · 2022-04-04 15:14:51 +05:30 · 7f79f21f1d
commit 7f79f21f1d
parent 7944a8a022
4 changed files with 32 additions and 14 deletions
--- a/recipes/fe_india.recipe
+++ b/recipes/fe_india.recipe
@ -63,6 +63,11 @@ class FE_India(BasicNewsRecipe):
        ('Money','https://www.financialexpress.com/money/feed'),
    ]
    def get_cover_url(self):
        soup = self.index_to_soup('https://www.magzter.com/IN/The-Indian-Express-Ltd./Financial-Express-Mumbai/Business/')
        for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
            return citem['content']
    def preprocess_html(self, soup, *a):
        for img in soup.findAll(attrs={'data-src': True}):
            img['src'] = img['data-src']
--- a/recipes/hindustan_times.recipe
+++ b/recipes/hindustan_times.recipe
@ -49,6 +49,11 @@ class HindustanTimes(BasicNewsRecipe):
        # ('Budget',''https://www.hindustantimes.com/feeds/rss/budget/rssfeed.xml')
        ]
    def get_cover_url(self):
        soup = self.index_to_soup('https://www.magzter.com/IN/HT-Digital-Streams-Ltd./Hindustan-Times-Delhi/Newspaper/')
        for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
            return citem['content']
    def preprocess_html(self, soup):
        for img in soup.findAll('img', attrs={'data-src': True}):
            img['src'] = img['data-src']
--- a/recipes/india_today.recipe
+++ b/recipes/india_today.recipe
@ -35,6 +35,13 @@ class IndiaToday(BasicNewsRecipe):
        ('Sports','https://www.indiatoday.in/rss/1206518'),
    ]
    extra_css = '[itemprop^="description"] {font-size: small; font-style: italic;}'
    def get_cover_url(self):
        soup = self.index_to_soup('https://www.magzter.com/IN/India-Today-Group/India-Today/News/')
        for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
            return citem['content']
    def preprocess_raw_html(self, raw_html, url):
        from calibre.ebooks.BeautifulSoup import BeautifulSoup
        soup = BeautifulSoup(raw_html)
--- a/recipes/the_week.recipe
+++ b/recipes/the_week.recipe
@ -2,7 +2,6 @@
 # vim:fileencoding=utf-8
 # License: GPLv3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
 from calibre.web.feeds.news import BasicNewsRecipe
@ -20,6 +19,7 @@ class TheWeek(BasicNewsRecipe):
    no_stylesheets = True
    use_embedded_content = True
    ignore_duplicate_articles = {'url'}
    remove_attributes = ['style', 'align', 'border', 'hspace']
    feeds = [
        ('Cover Story', 'https://www.theweek.in/theweek/cover.rss'),
@ -34,23 +34,24 @@ class TheWeek(BasicNewsRecipe):
    ]
    def get_cover_url(self):
-        soup = self.index_to_soup('https://www.theweek.in/theweek.html')
+        soup = self.index_to_soup(
-        for img in soup.findAll('img', attrs={'data-src-web': lambda x: x and '/cover-magazine' in x}):
+            'https://www.magzter.com/IN/Malayala_Manorama/THE_WEEK/Business/'
-            src = img['data-src-web']
+        )
-            try:
+        for citem in soup.findAll(
-                idx = src.rfind('.image.')
+            'meta', content=lambda s: s and s.endswith('view/3.jpg')
-            except Exception:
+        ):
-                pass
+            return citem['content']
            else:
                if idx > -1:
                    src = src[:idx]
            return 'https://img.theweek.in' + src
    def preprocess_html(self, soup):
        a = soup.find('a')
-        a.name = 'div'
+        if a:
            a.name = 'div'
        h2 = soup.find('h2')
-        h2.string = fix_title(h2.string)
+        if h2:
            h2.string = fix_title(h2.string)
        for p in soup.findAll('p'):
            if p.string == '\xa0':
                p.decompose()
        return soup
    def populate_article_metadata(self, article, soup, first):