from calibre.web.feeds.news import BasicNewsRecipe class DiarioSport(BasicNewsRecipe): title = u'Diario Sport' oldest_article = 2 max_articles_per_feed = 75 __author__ = 'Jefferson Frantz' description = 'Todas las noticias del Barça y del mundo del deporte en general' timefmt = ' [%d %b, %Y]' language = 'es' no_stylesheets = True feeds = [(u'Sport', u'http://feeds.feedburner.com/sport/ultimahora')] extra_css = ''' h2{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: justify} ''' keep_only_tags = [dict(name='div', attrs={'id': ['noticiasMedio']})] remove_tags = [ dict(name=['object', 'link', 'script', 'ul']), dict(name='div', attrs={'id': ['scrAdSense', 'herramientas2', 'participacion', 'participacion2', 'bloque1resultados', 'bloque2resultados', 'cont_vinyetesAnt', 'tinta', 'noticiasSuperior', 'cintillopublicidad2']}), dict( name='p', attrs={'class': ['masinformacion', 'hora']}), dict(name='a', attrs={'class': ["'link'"]}), dict(name='div', attrs={'class': ['addthis_toolbox addthis_default_style', 'firma', 'pretitularnoticia']}), dict(name='form', attrs={'id': ['formularioDeBusquedaAvanzada']}) # noqa ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return soup def postprocess_html(self, soup, first_fetch): img = soup.find('img', src='/img/videos/mascaravideo.png') if img is not None: img.extract() return soup