minor recipe updates

2025-08-30 23:00:21 -04:00 · 2022-11-15 10:40:56 +05:30 · 2022-11-15 10:40:56 +05:30 · 708155f2d6
commit 708155f2d6
parent 423fbbed4a
2 changed files with 33 additions and 36 deletions
--- a/recipes/hindu.recipe
+++ b/recipes/hindu.recipe
@ -11,7 +11,7 @@ def absurl(url):
    return url
-local_edition = None
+local_edition = 'th_hyderabad'
 # Chennai is default edition, for other editions use 'th_hyderabad', 'th_bangalore', 'th_delhi', 'th_kolkata' etc
@ -23,7 +23,8 @@ class TheHindu(BasicNewsRecipe):
    masthead_url = 'https://www.thehindu.com/theme/images/th-online/thehindu-logo.svg'
    remove_attributes = ['style', 'height', 'width']
    extra_css = '.caption{font-size:small; text-align:center;}'\
-        '.author{font-size:small;}'
+        '.author{font-size:small;}'\
        '.subhead{font-weight:bold;}'
    ignore_duplicate_articles = {'url'}
@ -36,27 +37,17 @@ class TheHindu(BasicNewsRecipe):
    ]
    def preprocess_html(self, soup):
        for cap in soup.findAll('p', attrs={'class':'caption'}):
            cap.name = 'span'
        for img in soup.findAll('img', attrs={'data-original':True}):
            img['src'] = img['data-original']
        return soup
-    def get_cover_url(self):
+    def populate_article_metadata(self, article, soup, first):
-        cover = 'https://img.kiosko.net/' + str(
+        if first and hasattr(self, 'add_toc_thumbnail'):
-            date.today().year
+            image = soup.find('img')
-        ) + '/' + date.today().strftime('%m') + '/' + date.today(
+            if image is not None:
-        ).strftime('%d') + '/in/hindu.750.jpg'
+                self.add_toc_thumbnail(article, image['src'])
        br = BasicNewsRecipe.get_browser(self)
        try:
            br.open(cover)
        except:
            index = 'https://en.kiosko.net/in/np/hindu.html'
            soup = self.index_to_soup(index)
            for image in soup.findAll('img', src=True):
                if image['src'].endswith('750.jpg'):
                    return image['src']
            self.log("\nCover unavailable")
            cover = None
        return cover
    def parse_index(self):
        if local_edition:
@ -69,6 +60,9 @@ class TheHindu(BasicNewsRecipe):
        raw = self.index_to_soup(url, raw=True)
        soup = self.index_to_soup(raw)
        ans = self.hindu_parse_index(soup)
        cover = soup.find(attrs={'class':'hindu-ad'})
        if cover:
            self.cover_url = cover.img['src']
        if not ans:
            raise ValueError(
                    'The Hindu Newspaper is not published Today.'
--- a/recipes/mit_technology_review.recipe
+++ b/recipes/mit_technology_review.recipe
@ -79,7 +79,7 @@ class MitTechnologyReview(BasicNewsRecipe):
        feeds = OrderedDict()
        classNamePrefixes = [
-            "teaserItem__title", "teaserItem--aside__title"
+            "magazineHero__letter--", "teaserItem__title", "teaserItem--aside__title"
        ]
        for div in soup.findAll(
            attrs={
@ -92,7 +92,8 @@ class MitTechnologyReview(BasicNewsRecipe):
            a = div.find('a', href=True)
            title = self.tag_to_string(a).strip()
            href = absurl(a['href'])
-
+            desc = ''
            section_title = 'Letter from the editor'
            d = div.findParent(
                attrs={
                    'class':
@ -100,24 +101,26 @@ class MitTechnologyReview(BasicNewsRecipe):
                    startswith(('teaserItem__wrapper', 'teaserItem--aside__wrapper'))
                }
            )
-            desc = self.tag_to_string(
+            if d:
-                d.find(
+
                excerpt = d.find(
                        attrs={
                            'class':
                            lambda x: x and x.startswith(
                                ('teaserItem__excerpt', 'teaserItem--aside__excerpt')
                            )
                        }
                    )
                if excerpt:
                    desc = self.tag_to_string(excerpt).strip()
                sec = d.find(
                    attrs={
-                        'class':
+                        'class': lambda x: x and x.startswith('teaserItem__eyebrowText')
                        lambda x: x and x.startswith(
                            ('teaserItem__excerpt', 'teaserItem--aside__excerpt')
                        )
                    }
                )
-            ).strip()
+                if sec:
-
+                    section_title = self.tag_to_string(sec).replace('Categorized in ',
            sec = d.find(
                attrs={
                    'class': lambda x: x and x.startswith('teaserItem__eyebrowText')
                }
            )
            section_title = self.tag_to_string(sec).replace('Categorized in ',
                                                            '').strip()
            if not href or not title: