__license__ = 'GPL v3' __copyright__ = '2009-2010, Darko Miletic ' ''' latercera.com ''' from calibre.web.feeds.news import BasicNewsRecipe class LaTercera(BasicNewsRecipe): news = True title = 'La Tercera' __author__ = 'Darko Miletic and Alex Mitrani' description = 'El sitio de noticias online de Chile' publisher = 'La Tercera' category = 'news, politics, Chile' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True encoding = 'cp1252' use_embedded_content = False remove_empty_feeds = True language = 'es_CL' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language, 'linearize_tables': True } keep_only_tags = [ dict(name='h1', attrs={'class': ['titularArticulo']}), dict(name='h4', attrs={'class': ['bajadaArt']}), dict( name='h5', attrs={'class': ['autorArt']}), dict(name='div', attrs={'class': ['articleContent']}) ] remove_tags = [ dict(name='div', attrs={'class': ['boxCompartir', 'keywords']}) ] remove_tags_after = [ dict(name='div', attrs={'class': ['keywords']}) ] feeds = [ (u'La Tercera', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1'), (u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674'), (u'Nacional', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=680'), (u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678'), (u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655'), (u'Santiago', u'http://www.latercera.com/feed/manager?type=rss&sc=TEFURVJDRVJB&citId=9&categoryId=1731'), (u'Tendencias', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=659'), (u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657'), (u'Cultura', u'http://www.latercera.com/feed/manager?type=rss&sc=TEFURVJDRVJB&citId=9&categoryId=1453'), (u'Entretención', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=661'), (u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656') ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return soup