From f12f69ef5edc3c9395abc1862f62f2405245fe2d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 21 Sep 2010 21:18:32 -0600 Subject: [PATCH] superesportes by Luciano Furtado. Fixes #405 (New news feed) --- resources/recipes/superesportes.recipe | 79 ++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 resources/recipes/superesportes.recipe diff --git a/resources/recipes/superesportes.recipe b/resources/recipes/superesportes.recipe new file mode 100644 index 0000000000..49289f188d --- /dev/null +++ b/resources/recipes/superesportes.recipe @@ -0,0 +1,79 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Luciano Furtado ' +''' +www.superesportes.com.br +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class SuperEsportesRecipe(BasicNewsRecipe): + + title = u'www.superesportes.com.br' + description = u'Superesportes - Notícias do esporte no Brasil e no mundo' + __author__ = 'Luciano Furtado' + language = 'pt' + category = 'esportes, Brasil' + no_stylesheets = True + oldest_article = 7 + + use_embedded_content=0 + max_articles_per_feed = 10 + cover_url = 'http://imgs.mg.superesportes.com.br/superesportes_logo.png' + + extra_css = 'div.info_noticias h1 { font-size: 100% }' + + + + remove_tags = [ + dict(name='div',attrs={'class':'topo'}), + dict(name='div',attrs={'class':'rodape'}), + dict(name='div',attrs={'class':'navegacao'}), + dict(name='div',attrs={'class':'lateral2'}), + dict(name='div',attrs={'class':'leia_mais'}), + dict(name='div',attrs={'id':'comentar'}), + dict(name='div',attrs={'id':'vrumelc_noticia'}), + dict(name='div',attrs={'class':'compartilhe'}), + dict(name='div',attrs={'class':'linha_noticias'}), + dict(name='div',attrs={'class':'botoes_noticias'}), + dict(name='div',attrs={'class':'barra_time bg_time'}), + ] + + + + def parse_index(self): + feeds = [] + sections = [ + (u'Atletico', 'http://www.df.superesportes.com.br/futebol/atletico-mg/capa_atletico_mg/index.shtml'), + (u'Botafogo', 'http://www.df.superesportes.com.br/futebol/botafogo/capa_botafogo/index.shtml'), + (u'Corinthinas', 'http://www.df.superesportes.com.br/futebol/corinthians/capa_corinthians/index.shtml'), + (u'Cruzeiro', 'http://www.df.superesportes.com.br/futebol/cruzeiro/capa_cruzeiro/index.shtml'), + (u'Flamengo', 'http://www.df.superesportes.com.br/futebol/flamengo/capa_flamengo/index.shtml'), + (u'Fluminense', 'http://www.df.superesportes.com.br/futebol/fluminense/capa_fluminense/index.shtml'), + (u'Palmeiras', 'http://www.df.superesportes.com.br/futebol/palmeiras/capa_palmeiras/index.shtml'), + (u'Santos', 'http://www.df.superesportes.com.br/futebol/santos/capa_santos/index.shtml'), + (u'São Paulo', 'http://www.df.superesportes.com.br/futebol/sao-paulo/capa_sao_paulo/index.shtml'), + (u'Vasco', 'http://www.df.superesportes.com.br/futebol/vasco/capa_vasco/index.shtml'), + ] + + + for section, url in sections: + current_articles = [] + + soup = self.index_to_soup(url) + latestNews = soup.find(name='ul',attrs={'class': 'lista_ultimas_noticias'}) + + for li_tag in latestNews.findAll(name='li'): + a_tag = li_tag.find('a', href= True) + if a_tag is None: + continue + title = self.tag_to_string(a_tag) + url = a_tag.get('href', False) + self.log("\n\nFound title: " + title + "\nUrl: " + url + "\nSection: " + section) + current_articles.append({'title': title, 'url': url, 'description': title, 'date':''}) + + if current_articles: + feeds.append((section, current_articles)) + + + return feeds +