...

2025-06-23 15:30:45 -04:00 · 2022-11-27 13:59:54 +05:30 · 2022-11-27 13:59:54 +05:30 · a91a614142
commit a91a614142
parent d9fa980ca4
2 changed files with 7 additions and 12 deletions
--- a/recipes/hindu.recipe
+++ b/recipes/hindu.recipe
@ -11,7 +11,7 @@ def absurl(url):
    return url


-local_edition = 'th_hyderabad'
+local_edition = None
 # Chennai is default edition, for other editions use 'th_hyderabad', 'th_bangalore', 'th_delhi', 'th_kolkata' etc


@ -23,8 +23,9 @@ class TheHindu(BasicNewsRecipe):
    masthead_url = 'https://www.thehindu.com/theme/images/th-online/thehindu-logo.svg'
    remove_attributes = ['style', 'height', 'width']
    extra_css = '.caption{font-size:small; text-align:center;}'\
-        '.author{font-size:small;}'\
-        '.subhead{font-weight:bold;}'
+        '.author{font-size:small; font-weight:bold;}'\
+        '.subhead, .subhead_lead {font-weight:bold;}'\
+        'img {display:block; margin:0 auto;}'

    ignore_duplicate_articles = {'url'}

@ -38,17 +39,11 @@ class TheHindu(BasicNewsRecipe):

    def preprocess_html(self, soup):
        for cap in soup.findAll('p', attrs={'class':'caption'}):
-            cap.name = 'span'
+            cap.name = 'figcaption'
        for img in soup.findAll('img', attrs={'data-original':True}):
            img['src'] = img['data-original']
        return soup

-    def populate_article_metadata(self, article, soup, first):
-        if first and hasattr(self, 'add_toc_thumbnail'):
-            image = soup.find('img')
-            if image is not None:
-                self.add_toc_thumbnail(article, image['src'])
-
    def parse_index(self):
        if local_edition:
            yr = str(date.today().year)
@ -85,7 +80,7 @@ class TheHindu(BasicNewsRecipe):
                        section = sec.replace('TH_', '')
                        title = item['articleheadline']
                        url = absurl(item['href'])
-                        desc = 'from page no.' + item['pageno'] + ' | ' + item['teaser_text'] or ''
+                        desc = 'Page no.' + item['pageno'] + ' | ' + item['teaser_text'] or ''
                        self.log('\t', title, '\n\t\t', url)
                        feeds_dict[section].append({"title": title, "url": url, "description": desc})
                return [(section, articles) for section, articles in feeds_dict.items()]
--- a/recipes/indian_express_print_edition.recipe
+++ b/recipes/indian_express_print_edition.recipe
@ -66,7 +66,7 @@ class IndianExpressPrint(BasicNewsRecipe):
    def parse_index(self):
        soup = self.index_to_soup('https://indianexpress.com/todays-paper/')
        feeds_dict = defaultdict(list)
-        for div in soup.findAll('div', attrs={'class':['lead-story', 'section']}):
+        for div in soup.findAll('div', attrs={'class':['lead-story', 'section', 'today-paper']}):
            for a in div.findAll('a', attrs={'href':lambda x: x and x.startswith('https://indianexpress.com/article/')}):
                if not a.find('img'):
                    url = a['href']