#!/usr/bin/env python # -*- coding: utf-8 mode: python -*- __license__ = 'GPL v3' __copyright__ = '2009-2015, Darko Miletic ' __docformat__ = 'restructuredtext es' ''' www.emol.com ''' from calibre.web.feeds.news import BasicNewsRecipe class ElMercurio(BasicNewsRecipe): title = 'Emol.com - El sitio de noticias online de Chile' __author__ = 'Darko Miletic' description = 'El sitio de noticias online de Chile' publisher = 'El Mercurio S.A.P.' category = 'news, politics, Chile' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True encoding = 'utf8' masthead_url = 'http://static.emol.cl/emol50/img/logo_emol.gif' remove_javascript = True use_embedded_content = False language = 'es_CL' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } keep_only_tags = [ dict(name='div', attrs={'class': ['cont_iz_titulobajada', 'info-notaemol-por', 'info-notaemol-porfecha']}), dict(name='div', attrs={'id': 'texto_noticia'}) ] remove_tags = [ dict(name='div', attrs={'id': 'cont_iz_cuerpo_relacionados'})] feeds = [ (u'Nacional', u'http://www.emol.com/noticias/nacional/todas.aspx'), (u'Mundo', u'http://www.emol.com/noticias/internacional/todas.aspx'), (u'Deportes', u'http://www.emol.com/noticias/deportes/todas.aspx'), (u'Espectaculos', u'http://www.emol.com/noticias/cultura/todas.aspx'), (u'Tecnologia', u'http://www.emol.com/noticias/economia/todas.aspx') ] def parse_index(self): totalfeeds = [] lfeeds = self.get_feeds() for feedobj in lfeeds: feedtitle, feedurl = feedobj self.report_progress(0, _('Fetching feed') + ' %s...' % (feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) arts = soup.find('div', attrs={'id': 'caja_listado_noticia_todas'}) if arts: for item in arts.findAll('div', attrs={'class': 'listado'}): atag = item.find('a') ptag = item.find('span') url = atag['href'] title = self.tag_to_string(atag) description = self.tag_to_string(ptag) articles.append({ 'title': title, 'date': '', 'url': url, 'description': description }) totalfeeds.append((feedtitle, articles)) return totalfeeds