From ff828ae47dd8981d578cf3784b4b332b7dff263f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 20 Sep 2017 11:55:31 +0530 Subject: [PATCH] Update The Hindu --- recipes/hindu.recipe | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/recipes/hindu.recipe b/recipes/hindu.recipe index b35438a05e..61fa5781eb 100644 --- a/recipes/hindu.recipe +++ b/recipes/hindu.recipe @@ -24,8 +24,8 @@ class TheHindu(BasicNewsRecipe): ignore_duplicate_articles = {'title', 'url'} keep_only_tags = [ - dict(name='h1', attrs={'class': 'title'}), - classes('lead-img-cont mobile-author-cont'), + dict(name='h1', attrs={'class': ['title', 'special-article-heading']}), + classes('author-nm lead-img-cont mobile-author-cont photo-collage intro'), dict(id=lambda x: x and x.startswith('content-body-')), ] @@ -56,6 +56,10 @@ class TheHindu(BasicNewsRecipe): return ans def parse_index(self): + # return [('xxx', [ + # {'title':'xxx', 'url':'http://www.thehindu.com/opinion/op-ed/rohingya-bangladeshs-burden-to-bear/article19694058.ece'}, + # {'title':'yyy', 'url':'http://www.thehindu.com/sci-tech/energy-and-environment/on-river-washed-antique-plains/article19699327.ece'} + # ])] soup = self.index_to_soup('http://www.thehindu.com/todays-paper/') nav_div = soup.find(id='subnav-tpbar-latest') section_list = []