__license__ = 'GPL v3' __copyright__ = '2009-2010, Darko Miletic ' ''' emol.com ''' from calibre.web.feeds.news import BasicNewsRecipe class ElMercurio(BasicNewsRecipe): title = 'El Mercurio online' __author__ = 'Darko Miletic' description = 'El sitio de noticias online de Chile' publisher = 'El Mercurio' category = 'news, politics, Chile' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'cp1252' masthead_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif' remove_javascript = True use_embedded_content = False language = 'es_CL' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } keep_only_tags = [dict(name='div', attrs={'id':['cont_iz_titulobajada','cont_iz_creditos_1_a','cont_iz_cuerpo']})] remove_tags = [dict(name='div', attrs={'id':'cont_iz_cuerpo_relacionados'})] remove_attributes = ['height','width'] feeds = [ (u'Noticias de ultima hora', u'http://rss.emol.com/rss.asp?canal=0') ,(u'Nacional', u'http://rss.emol.com/rss.asp?canal=1') ,(u'Mundo', u'http://rss.emol.com/rss.asp?canal=2') ,(u'Deportes', u'http://rss.emol.com/rss.asp?canal=4') ,(u'Magazine', u'http://rss.emol.com/rss.asp?canal=6') ,(u'Tecnologia', u'http://rss.emol.com/rss.asp?canal=5') ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return soup