# -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2011, Piet van Oostrum ' ''' www.opinion.com.bo ''' from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Opinion_Bol(BasicNewsRecipe): title = u'Opinión - Bolivia' __author__ = 'Piet van Oostrum' description = u'Opinión diario de circulación nacional, Cochabamba, Bolivia' publisher = 'Coboce Ltda - Editora Opinión' category = 'news, politics, Bolivia' version = 1 oldest_article = 1 max_articles_per_feed = 20 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False language = 'es_BO' publication_type = 'newspaper' delay = 1 remove_empty_feeds = True cover_url = strftime( 'http://www.opinion.com.bo/opinion/articulos/%Y/%m%d/fotos/portada_650.jpg') masthead_url = 'http://opinion.com.bo/opinion/articulos/imagenes/logo_opinion.gif' extra_css = """body{font-family: Helvetica,Arial,sans-serif} .seccion_encabezado_nota_inte{font-size: 1.1em; font-weight: bold;} .autor_nota_inte{color: #999999; font-size: 0.8em; margin-bottom: 0.5em; text-align: right;} .pie{font-size: 0.8em;}""" conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } keep_only_tags = [ dict(name='div', attrs={'class': 'columna_izq_nota_intererior'})] remove_tags = [dict(name=['meta', 'link', 'form', 'iframe', 'embed', 'object', 'style']), dict(name='div', attrs={'class': 'ocultar'})] remove_attributes = ['width', 'height'] feeds = [ (u'El País', u'http://www.opinion.com.bo/opinion/rss/el_pais_rss.xml'), (u'Cochabamba', u'http://www.opinion.com.bo/opinion/rss/cochabamba_rss.xml'), (u'Economía', u'http://www.opinion.com.bo/opinion/rss/economia_rss.xml'), (u'Cultura', u'http://www.opinion.com.bo/opinion/rss/cultura_rss.xml'), (u'Mundo', u'http://www.opinion.com.bo/opinion/rss/mundo_rss.xml'), (u'Ciencia y Tecnología', u'http://www.opinion.com.bo/opinion/rss/ciencia_tecnologia_rss.xml'), (u'Policial', u'http://www.opinion.com.bo/opinion/rss/policial_rss.xml'), (u'Editorial', u'http://www.opinion.com.bo/opinion/rss/editorial_rss.xml'), (u'Subeditorial', u'http://www.opinion.com.bo/opinion/rss/subeditorial_rss.xml'), (u'Opinión', u'http://www.opinion.com.bo/opinion/rss/opinion_rss.xml'), (u'Deportes', u'http://www.opinion.com.bo/opinion/rss/deportes_rss.xml'), (u' Vida de hoy', u'http://www.opinion.com.bo/opinion/rss/vidadehoy_rss.xml') ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return soup # Filter out today's articles # maybe should take timezone into account today = strftime('/%Y/%m%d/') def get_article_url(self, article): link = article.link if self.today in link: return link