diff --git a/recipes/hindu.recipe b/recipes/hindu.recipe index b29b8f17ce..2ba4c55a4e 100644 --- a/recipes/hindu.recipe +++ b/recipes/hindu.recipe @@ -3,7 +3,7 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' from calibre.web.feeds.news import BasicNewsRecipe -import string +import string, re def classes(classes): @@ -30,10 +30,26 @@ class TheHindu(BasicNewsRecipe): ] def preprocess_html(self, soup): - for img in soup.findAll('img', attrs={'data-src-template': True}): - img['src'] = img['data-src-template'].replace('BINARY/thumbnail', 'alternates/FREE_660') + img = soup.find('img', attrs={'class': 'lead-img'}) + try: + src = img.parent.find('source').get('srcset') + img['src'] = re.sub(r'(ALTERNATES)/.+?/', r'\1/FREE_660/', src) + except (TypeError, AttributeError): + pass + # Remove duplicate intro + for h in soup.findAll('h2', attrs={'class': 'intro'})[1:]: + h.extract() return soup + def populate_article_metadata(self, article, soup, first): + try: + desc = soup.find('meta', attrs={'name': 'description'}).get('content') + if not desc.startswith('Todays paper'): + desc += '...' if len(desc) >= 199 else '' # indicate truncation + article.text_summary = article.summary = desc + except AttributeError: + return + def articles_from_soup(self, soup): ans = [] div = soup.find('section', attrs={'id': 'section_'})