From 5df3b224a64de715ed997ee865970a95efc45bff Mon Sep 17 00:00:00 2001 From: Shiva Prasad Date: Sun, 13 Jun 2021 20:56:18 +0530 Subject: [PATCH] Recipe: add cover_url to The Hindu Gets today's front page image displayed in Hindu ePaper website. Also add some styles to the lead image and caption. --- recipes/hindu.recipe | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/recipes/hindu.recipe b/recipes/hindu.recipe index 1fab6775b0..1b0f7b6a5c 100644 --- a/recipes/hindu.recipe +++ b/recipes/hindu.recipe @@ -15,12 +15,15 @@ def classes(classes): class TheHindu(BasicNewsRecipe): title = u'The Hindu' language = 'en_IN' + epaper_url = 'https://epaper.thehindu.com' oldest_article = 1 __author__ = 'Kovid Goyal' max_articles_per_feed = 100 no_stylesheets = True remove_attributes = ['style'] + extra_css = '.lead-img-cont { text-align: center; } ' \ + '.lead-img-caption { font-size: small; font-style: italic; }' ignore_duplicate_articles = {'title', 'url'} keep_only_tags = [ @@ -29,6 +32,15 @@ class TheHindu(BasicNewsRecipe): dict(id=lambda x: x and x.startswith('content-body-')), ] + def get_browser(self): + br = BasicNewsRecipe.get_browser(self) + br.addheaders += [('Referer', self.epaper_url)] # needed for fetching cover + return br + + def get_cover_url(self): + url = self.index_to_soup(self.epaper_url + '/Login/DefaultImage', raw=True) + return url.replace(br'\\', b'/').decode('utf-8')[1:-1] + def preprocess_html(self, soup): img = soup.find('img', attrs={'class': 'lead-img'}) try: