diff --git a/recipes/icons/lacapital.png b/recipes/icons/lacapital.png new file mode 100644 index 0000000000..1b245e6874 Binary files /dev/null and b/recipes/icons/lacapital.png differ diff --git a/recipes/lacapital.recipe b/recipes/lacapital.recipe new file mode 100644 index 0000000000..0c9553903c --- /dev/null +++ b/recipes/lacapital.recipe @@ -0,0 +1,76 @@ +__license__ = 'GPL v3' +__copyright__ = '2013, Darko Miletic ' +''' +www.lacapital.com.ar +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class LaCapital(BasicNewsRecipe): + title = 'La Capital de Rosario' + __author__ = 'Darko Miletic' + description = 'Noticias, actualidad y toda la informacion de Rosario y la region' + publisher = 'Diario La Capital S. A.' + category = 'news, politics, Rosario, Santa Fe, Argentina' + oldest_article = 2 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'es_AR' + remove_empty_feeds = True + publication_type = 'newspaper' + masthead_url = 'http://www.lacapital.com.ar/system/modules/com.tfsla.diario.core/resources/images/logoLaCapital_noCom.png' + extra_css = """ + body{font-family: Georgia,"Times New Roman",Times,serif } + img{margin-bottom: 0.4em; display:block} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + keep_only_tags=[dict(attrs={'class':'leer'})] + remove_tags_after=dict(attrs={'class':'notaA'}) + remove_tags = [ + dict(name=['meta','link','iframe','object']) + ,dict(name='div', attrs={'class':['herramientas','almargen','relacionadas']}) + ] + + + feeds = [ + (u'Portada' , u'http://www.lacapital.com.ar/rss/home.xml' ) + ,(u'La Ciudad' , u'http://www.lacapital.com.ar/rss/laciudad.xml' ) + ,(u'Politica' , u'http://www.lacapital.com.ar/rss/politica.xml' ) + ,(u'Economia' , u'http://www.lacapital.com.ar/rss/economia.xml' ) + ,(u'La Region' , u'http://www.lacapital.com.ar/rss/laregion.xml' ) + ,(u'Informacion General' , u'http://www.lacapital.com.ar/rss/informaciongral.xml' ) + ,(u'El Mundo' , u'http://www.lacapital.com.ar/rss/elmundo.xml' ) + ,(u'Opinion' , u'http://www.lacapital.com.ar/rss/opinion.xml' ) + ,(u'Cartas de lectores' , u'http://www.lacapital.com.ar/rss/cartasdelectores.xml') + ,(u'Escenario' , u'http://www.lacapital.com.ar/rss/escenario.xml' ) + ,(u'Policiales' , u'http://www.lacapital.com.ar/rss/policiales.xml' ) + ,(u'Ovacion' , u'http://www.lacapital.com.ar/rss/ovacion.xml' ) + ,(u'Turismo' , u'http://www.lacapital.com.ar/rss/turismo.xml' ) + ,(u'Economia' , u'http://www.lacapital.com.ar/rss/economia.xml' ) + ,(u'SeƱales' , u'http://www.lacapital.com.ar/rss/senales.xml' ) + ,(u'Educacion' , u'http://www.lacapital.com.ar/rss/educacion.xml' ) + ,(u'Estilo' , u'http://www.lacapital.com.ar/rss/estilo.xml' ) + ,(u'Salud' , u'http://www.lacapital.com.ar/rss/salud.xml' ) + ,(u'Tecnologia' , u'http://www.lacapital.com.ar/rss/tecnologia.xml' ) + ] + + def get_cover_url(self): + soup = self.index_to_soup('http://www.lacapital.com.ar/impresa/tapa.html') + for image in soup.findAll('img',alt=True): + if image['alt'].startswith('Tapa de papel'): + return image['src'] + return None + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup