__license__ = 'GPL v3' __copyright__ = '2009-2010, Darko Miletic ' ''' latercera.com ''' from calibre.web.feeds.news import BasicNewsRecipe class LaTercera(BasicNewsRecipe): news = True title = 'La Tercera' __author__ = 'Darko Miletic and Alex Mitrani' description = 'El sitio de noticias online de Chile' publisher = 'La Tercera' category = 'news, politics, Chile' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True encoding = 'cp1252' use_embedded_content = False remove_empty_feeds = True language = 'es_CL' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language , 'linearize_tables' : True } keep_only_tags = [ dict(name='h1', attrs={'class':['titularArticulo']}) ,dict(name='h4', attrs={'class':['bajadaArt']}) ,dict(name='h5', attrs={'class':['autorArt']}) ,dict(name='div', attrs={'class':['articleContent']}) ] remove_tags = [ dict(name='div', attrs={'class':['boxCompartir','keywords']}) ] remove_tags_after = [ dict(name='div', attrs={'class':['keywords']}) ] feeds = [(u'La Tercera', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&ul=1') ,(u'Politica', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=674') ,(u'Nacional', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=680') ,(u'Mundo', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=678') ,(u'Negocios', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=655') ,(u'Santiago', u'http://www.latercera.com/feed/manager?type=rss&sc=TEFURVJDRVJB&citId=9&categoryId=1731') ,(u'Tendencias', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=659') ,(u'Educacion', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=657') ,(u'Cultura', u'http://www.latercera.com/feed/manager?type=rss&sc=TEFURVJDRVJB&citId=9&categoryId=1453') ,(u'Entretención', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=661') ,(u'Deportes', u'http://www.latercera.com/app/rss?sc=TEFURVJDRVJB&category=656') ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return soup