From e1d99a09aba143adf4f4475bdf7288ffc82ea069 Mon Sep 17 00:00:00 2001 From: Carlos Alves Date: Sun, 29 Sep 2013 13:45:11 -0300 Subject: [PATCH] Add some recipes of Uruguay. Add 10minutos.recipe (News of Salto) Add diario_el_pueblo.recipe (News of Salto) Add diario_salto.recipe (News of Salto) Add unoticias.recipe (News of Montevideo) --- recipes/10minutos.recipe | 50 +++++++++++++++++++++++++++++ recipes/diario_el_pueblo.recipe | 51 ++++++++++++++++++++++++++++++ recipes/diario_salto.recipe | 50 +++++++++++++++++++++++++++++ recipes/unoticias.recipe | 56 +++++++++++++++++++++++++++++++++ 4 files changed, 207 insertions(+) create mode 100644 recipes/10minutos.recipe create mode 100644 recipes/diario_el_pueblo.recipe create mode 100644 recipes/diario_salto.recipe create mode 100644 recipes/unoticias.recipe diff --git a/recipes/10minutos.recipe b/recipes/10minutos.recipe new file mode 100644 index 0000000000..4c2f8a7ec7 --- /dev/null +++ b/recipes/10minutos.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +10minutos.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = '10minutos' + __author__ = 'Carlos Alves' + description = 'Noticias de Salto - Uruguay' + tags = 'news, sports' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(name='div', attrs={'class':'post-content'})] + + remove_tags = [ + dict(name='div', attrs={'class':['hr', 'titlebar', 'navigation']}), + dict(name='p', attrs={'class':'post-meta'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://10minutos.com.uy/feed/') + ] + + def get_cover_url(self): + return 'http://10minutos.com.uy/a/img/logo.png' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/diario_el_pueblo.recipe b/recipes/diario_el_pueblo.recipe new file mode 100644 index 0000000000..4cfab9eb32 --- /dev/null +++ b/recipes/diario_el_pueblo.recipe @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +diarioelpueblo.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Diario El Pueblo' + __author__ = 'Carlos Alves' + description = 'Noticias de Salto - Uruguay' + tags = 'news, sports' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(name='div', attrs={'class':'post-alt blog'})] + + remove_tags = [ + dict(name='div', attrs={'class':['hr', 'titlebar', 'volver-arriba-right','navigation']}), + dict(name='div', attrs={'id':'comment','id':'suckerfish','id':'crp_related'}), + dict(name='h3', attrs={'class':['post_date']}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://www.diarioelpueblo.com.uy/feed') + ] + + def get_cover_url(self): + return 'http://www.diarioelpueblo.com.uy/wp-content/uploads/2013/06/Cabezal_Web1.jpg' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/diario_salto.recipe b/recipes/diario_salto.recipe new file mode 100644 index 0000000000..799233db4d --- /dev/null +++ b/recipes/diario_salto.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +diarisalto.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Diario Salto' + __author__ = 'Carlos Alves' + description = 'Noticias de Salto - Uruguay' + tags = 'news, sports' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(name='div', attrs={'class':'post'})] + + remove_tags = [ + dict(name='div', attrs={'class':['hr', 'titlebar', 'navigation']}), + dict(name='div', attrs={'id':'comment'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://www.diariosalto.com.uy/feed/atom') + ] + + def get_cover_url(self): + return 'http://diariosalto.com.uy/demo/wp-content/uploads/2011/12/diario-salto_logo-final-b-b.png' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/unoticias.recipe b/recipes/unoticias.recipe new file mode 100644 index 0000000000..f7d1c7693c --- /dev/null +++ b/recipes/unoticias.recipe @@ -0,0 +1,56 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +unoticias.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'UNoticias' + __author__ = 'Carlos Alves' + description = 'Noticias Uruguay' + tags = 'news, sports, politics' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'ISO-8859-1' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [ + dict(name='h1', attrs={'class':'nombre'}), + dict(name='h2', attrs={'class':'copete t20'}), + dict(name='div', attrs={'class':'desc'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['br', 'hr', 'titlebar', 'navigation']}), + dict(name='div', attrs={'id':'comment'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Nacionales', u'http://www.unoticias.com.uy/RSS/nacionales.xml'), + (u'Deportes', u'http://www.unoticias.com.uy/RSS/deportes.xml'), + (u'Sociedad', u'http://www.unoticias.com.uy/RSS/Sociedad.xml') + ] + + def get_cover_url(self): + return 'http://www.unoticias.com.uy/artworks/logos/logo_small.gif' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup