# -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2011, Piet van Oostrum ' ''' www.opinion.com.bo ''' from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Opinion_Bol(BasicNewsRecipe): title = u'Opinión - Bolivia' __author__ = 'Piet van Oostrum' description = u'Opinión diario de circulación nacional, Cochabamba, Bolivia' publisher = 'Coboce Ltda - Editora Opinión' category = 'news, politics, Bolivia' version = 1 oldest_article = 1 max_articles_per_feed = 20 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False language = 'es_BO' publication_type = 'newspaper' delay = 1 remove_empty_feeds = True cover_url = strftime('http://www.opinion.com.bo/opinion/articulos/%Y/%m%d/fotos/portada_650.jpg') masthead_url = 'http://opinion.com.bo/opinion/articulos/imagenes/logo_opinion.gif' extra_css = """body{font-family: Helvetica,Arial,sans-serif} .seccion_encabezado_nota_inte{font-size: 1.1em; font-weight: bold;} .autor_nota_inte{color: #999999; font-size: 0.8em; margin-bottom: 0.5em; text-align: right;} .pie{font-size: 0.8em;}""" conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } keep_only_tags = [dict(name='div', attrs={'class':'columna_izq_nota_intererior'})] remove_tags = [dict(name=['meta','link','form','iframe','embed','object','style']), dict(name='div', attrs={'class':'ocultar'})] remove_attributes = ['width','height'] feeds = [ (u'El País' , u'http://www.opinion.com.bo/opinion/rss/el_pais_rss.xml' ) ,(u'Cochabamba' , u'http://www.opinion.com.bo/opinion/rss/cochabamba_rss.xml' ) ,(u'Economía' , u'http://www.opinion.com.bo/opinion/rss/economia_rss.xml' ) ,(u'Cultura' , u'http://www.opinion.com.bo/opinion/rss/cultura_rss.xml' ) ,(u'Mundo' , u'http://www.opinion.com.bo/opinion/rss/mundo_rss.xml' ) ,(u'Ciencia y Tecnología', u'http://www.opinion.com.bo/opinion/rss/ciencia_tecnologia_rss.xml' ) ,(u'Policial' , u'http://www.opinion.com.bo/opinion/rss/policial_rss.xml' ) ,(u'Editorial' , u'http://www.opinion.com.bo/opinion/rss/editorial_rss.xml' ) ,(u'Subeditorial' , u'http://www.opinion.com.bo/opinion/rss/subeditorial_rss.xml' ) ,(u'Opinión' , u'http://www.opinion.com.bo/opinion/rss/opinion_rss.xml' ) ,(u'Deportes' , u'http://www.opinion.com.bo/opinion/rss/deportes_rss.xml') ,(u' Vida de hoy' , u'http://www.opinion.com.bo/opinion/rss/vidadehoy_rss.xml' ) ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return soup # Filter out today's articles # maybe should take timezone into account today = strftime('/%Y/%m%d/') def get_article_url(self, article): link = article.link if self.today in link: return link