This commit is contained in:
Kovid Goyal 2021-06-11 08:05:54 +05:30
commit e8cc06935d
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -3,7 +3,7 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import string import string, re
def classes(classes): def classes(classes):
@ -30,10 +30,26 @@ class TheHindu(BasicNewsRecipe):
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-src-template': True}): img = soup.find('img', attrs={'class': 'lead-img'})
img['src'] = img['data-src-template'].replace('BINARY/thumbnail', 'alternates/FREE_660') try:
src = img.parent.find('source').get('srcset')
img['src'] = re.sub(r'(ALTERNATES)/.+?/', r'\1/FREE_660/', src)
except (TypeError, AttributeError):
pass
# Remove duplicate intro
for h in soup.findAll('h2', attrs={'class': 'intro'})[1:]:
h.extract()
return soup return soup
def populate_article_metadata(self, article, soup, first):
try:
desc = soup.find('meta', attrs={'name': 'description'}).get('content')
if not desc.startswith('Todays paper'):
desc += '...' if len(desc) >= 199 else '' # indicate truncation
article.text_summary = article.summary = desc
except AttributeError:
return
def articles_from_soup(self, soup): def articles_from_soup(self, soup):
ans = [] ans = []
div = soup.find('section', attrs={'id': 'section_'}) div = soup.find('section', attrs={'id': 'section_'})