calibre/recipes/el_pais_babelia.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

49 lines
2.1 KiB
Plaintext

from calibre.web.feeds.news import BasicNewsRecipe
class ElPaisBabelia(BasicNewsRecipe):
title = 'El Pais Babelia'
__author__ = 'oneillpt'
description = 'El Pais Babelia'
INDEX = 'http://www.elpais.com/suple/babelia/'
language = 'es'
remove_tags_before = dict(name='div', attrs={'class': 'estructura_2col'})
keep_tags = [dict(name='div', attrs={'class': 'estructura_2col'})]
remove_tags = [dict(name='div', attrs={'class': 'votos estirar'}),
dict(name='div', attrs={'id': 'utilidades'}),
dict(name='div', attrs={'class': 'info_relacionada'}),
dict(name='div', attrs={'class': 'mod_apoyo'}),
dict(name='div', attrs={'class': 'contorno_f'}),
dict(name='div', attrs={'class': 'pestanias'}),
dict(name='div', attrs={'class': 'otros_webs'}),
dict(name='div', attrs={'id': 'pie'})
]
remove_javascript = True
def parse_index(self):
articles = []
soup = self.index_to_soup(self.INDEX)
feeds = []
for section in soup.findAll('div', attrs={'class': 'contenedor_nuevo'}):
section_title = self.tag_to_string(section.find('h1'))
articles = []
for post in section.findAll('a', href=True):
url = post['href']
if url.startswith('/'):
url = 'http://www.elpais.es' + url
title = self.tag_to_string(post)
if str(post).find('class=') > 0:
klass = post['class']
if klass != "":
self.log()
self.log('--> post: ', post)
self.log('--> url: ', url)
self.log('--> title: ', title)
self.log('--> class: ', klass)
articles.append({'title': title, 'url': url})
if articles:
feeds.append((section_title, articles))
return feeds