Add sport section to new guardian recipe

This commit is contained in:
Kovid Goyal 2015-11-26 19:18:21 +05:30
parent 7cdf343936
commit 82f5e48d7b

View File

@ -31,10 +31,7 @@ class Guardian(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
no_stylesheets = True no_stylesheets = True
remove_attributes = ['style'] remove_attributes = ['style']
ignore_duplicate_articles = {'title', 'url'}
# List of section titles to ignore
# For example: ['Sport']
ignore_sections = []
timefmt = ' [%a, %d %b %Y]' timefmt = ' [%a, %d %b %Y]'
@ -58,11 +55,11 @@ class Guardian(BasicNewsRecipe):
img['srcset'] = '' img['srcset'] = ''
return soup return soup
def parse_index(self): def parse_section(self, url, title_prefix=''):
feeds = [] feeds = []
soup = self.index_to_soup(self.base_url) soup = self.index_to_soup(url)
for section in soup.findAll('section'): for section in soup.findAll('section'):
title = self.tag_to_string(section.find(attrs={'class':'fc-container__header__title'})).strip().capitalize() title = title_prefix + self.tag_to_string(section.find(attrs={'class':'fc-container__header__title'})).strip().capitalize()
self.log('\nFound section:', title) self.log('\nFound section:', title)
feeds.append((title, [])) feeds.append((title, []))
for li in section.findAll('li'): for li in section.findAll('li'):
@ -73,3 +70,8 @@ class Guardian(BasicNewsRecipe):
feeds[-1][1].append({'title':title, 'url':url}) feeds[-1][1].append({'title':title, 'url':url})
break break
return feeds return feeds
def parse_index(self):
feeds = self.parse_section(self.base_url)
feeds += self.parse_section('http://www.theguardian.com/uk/sport', 'Sport - ')
return feeds