Update The Hindu

This commit is contained in:
Kovid Goyal 2017-09-20 11:55:31 +05:30
parent 81f6841cdb
commit ff828ae47d
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -24,8 +24,8 @@ class TheHindu(BasicNewsRecipe):
ignore_duplicate_articles = {'title', 'url'} ignore_duplicate_articles = {'title', 'url'}
keep_only_tags = [ keep_only_tags = [
dict(name='h1', attrs={'class': 'title'}), dict(name='h1', attrs={'class': ['title', 'special-article-heading']}),
classes('lead-img-cont mobile-author-cont'), classes('author-nm lead-img-cont mobile-author-cont photo-collage intro'),
dict(id=lambda x: x and x.startswith('content-body-')), dict(id=lambda x: x and x.startswith('content-body-')),
] ]
@ -56,6 +56,10 @@ class TheHindu(BasicNewsRecipe):
return ans return ans
def parse_index(self): def parse_index(self):
# return [('xxx', [
# {'title':'xxx', 'url':'http://www.thehindu.com/opinion/op-ed/rohingya-bangladeshs-burden-to-bear/article19694058.ece'},
# {'title':'yyy', 'url':'http://www.thehindu.com/sci-tech/energy-and-environment/on-river-washed-antique-plains/article19699327.ece'}
# ])]
soup = self.index_to_soup('http://www.thehindu.com/todays-paper/') soup = self.index_to_soup('http://www.thehindu.com/todays-paper/')
nav_div = soup.find(id='subnav-tpbar-latest') nav_div = soup.find(id='subnav-tpbar-latest')
section_list = [] section_list = []