From b0d4c388d64f0c989b573b628d02fbe7a7f6ec86 Mon Sep 17 00:00:00 2001 From: Shiva Prasad Date: Tue, 15 Jun 2021 17:24:59 +0530 Subject: [PATCH] Recipe: make Hindu better resemble print edition * Remove redundant date and timestamps cluttering every article * Place intro immediately beneath the heading, as in print edition |- duplicate intro is now removed using CSS * Visual styling --- recipes/hindu.recipe | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/recipes/hindu.recipe b/recipes/hindu.recipe index 1b0f7b6a5c..3116aa5ba5 100644 --- a/recipes/hindu.recipe +++ b/recipes/hindu.recipe @@ -23,12 +23,14 @@ class TheHindu(BasicNewsRecipe): no_stylesheets = True remove_attributes = ['style'] extra_css = '.lead-img-cont { text-align: center; } ' \ - '.lead-img-caption { font-size: small; font-style: italic; }' + '.lead-img-caption { font-size: small; font-style: italic; } ' \ + '.mobile-author-cont { font-size: small; text-transform: uppercase; } ' \ + '.intro ~ .intro, .update-time, .ksl-time-stamp * { display: none; } ' ignore_duplicate_articles = {'title', 'url'} keep_only_tags = [ dict(name='h1', attrs={'class': ['title', 'special-article-heading']}), - classes('author-nm lead-img-cont mobile-author-cont photo-collage intro'), + classes('lead-img-cont mobile-author-cont photo-collage intro'), dict(id=lambda x: x and x.startswith('content-body-')), ] @@ -50,9 +52,15 @@ class TheHindu(BasicNewsRecipe): source.extract() except Exception: pass - # Remove duplicate intro - for h in soup.findAll('h2', attrs={'class': 'intro'})[1:]: - h.extract() + # Place intro beneath the title, skip duplicates + try: + soup.h1.insert_after(soup.find('h2', attrs={'class': 'intro'})) + except Exception: + pass + # Remove ',' from location tag + ts = soup.find('span', attrs={'class': 'ksl-time-stamp'}) + if ts and ts.string: + ts.string = ts.string.split(',')[0] return soup def populate_article_metadata(self, article, soup, first):