From c13bb3817898a75f00d8b7789c6f88a1d6c01935 Mon Sep 17 00:00:00 2001 From: Carlos Alves Date: Mon, 11 Jan 2016 14:29:59 -0300 Subject: [PATCH] Update recipes of Uruguay. Delete diario_salto.recipe, the site has been down for a while. Delete la_diaria.recipe, it requires a paid subscription. Delete unoticias.recipe, the newspaper closed months ago. Add lr21.recipe, newspaper. Update 10minutos.recipe Update 180.recipe Update diario_el_pueblo.recipe Update el_observador.recipe Update el_pais_uy.recipe Update padreydecano.recipe --- recipes/10minutos.recipe | 15 ++++++- recipes/180.recipe | 12 +++++- recipes/diario_el_pueblo.recipe | 14 ++++++- recipes/diario_salto.recipe | 50 ----------------------- recipes/el_observador.recipe | 9 ++-- recipes/el_pais_uy.recipe | 17 ++++---- recipes/la_diaria.recipe | 48 ---------------------- recipes/{unoticias.recipe => lr21.recipe} | 40 ++++++++++-------- recipes/padreydecano.recipe | 19 +++++++-- 9 files changed, 91 insertions(+), 133 deletions(-) delete mode 100644 recipes/diario_salto.recipe delete mode 100644 recipes/la_diaria.recipe rename recipes/{unoticias.recipe => lr21.recipe} (56%) diff --git a/recipes/10minutos.recipe b/recipes/10minutos.recipe index b8697cc50e..77903c363f 100644 --- a/recipes/10minutos.recipe +++ b/recipes/10minutos.recipe @@ -1,7 +1,17 @@ #!/usr/bin/env python2 +## +## Title: Diario 10minutos.com.uy News and Sports Calibre Recipe +## Contact: Carlos Alves - +## +## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html +## Copyright: Carlos Alves - +## +## Written: September 2013 +## Last Edited: 2016-01-11 +## __license__ = 'GPL v3' -__author__ = '2013, Carlos Alves ' +__author__ = '2016, Carlos Alves ' ''' 10minutos.com.uy ''' @@ -27,6 +37,7 @@ class General(BasicNewsRecipe): remove_tags = [ dict(name='div', attrs={'class':['hr', 'titlebar', 'navigation']}), + dict(name='div', attrs={'class':'sharedaddy sd-sharing-enabled'}), dict(name='p', attrs={'class':'post-meta'}), dict(name=['object','link']) ] @@ -38,7 +49,7 @@ class General(BasicNewsRecipe): p {font-family:Arial,Helvetica,sans-serif;} ''' feeds = [ - (u'Articulos', u'http://10minutos.com.uy/feed/') + (u'Articulos', u'http://10minutos.com.uy/?feed=rss2') ] def get_cover_url(self): diff --git a/recipes/180.recipe b/recipes/180.recipe index 67748d7484..4be825a81e 100644 --- a/recipes/180.recipe +++ b/recipes/180.recipe @@ -1,4 +1,7 @@ #!/usr/bin/env python2 +## +## Last Edited: 2016-01-11 Carlos Alves +## __license__ = 'GPL v3' __author__ = '2010, Gustavo Azambuja ' @@ -22,7 +25,12 @@ class Noticias(BasicNewsRecipe): oldest_article = 2 max_articles_per_feed = 100 - keep_only_tags = [dict(name='div', attrs={'class':'tef-md tef-md-seccion-sociedad'})] + remove_tags_after = dict(name='article') + keep_only_tags = [ + dict(name='h3', attrs={'class':'title'}), + dict(name='div', attrs={'class':'copete'}), + dict(name='article', attrs={'class':'texto'}) + ] remove_tags = [ dict(name=['object','link']) ] @@ -40,7 +48,7 @@ class Noticias(BasicNewsRecipe): ] def get_cover_url(self): - return 'http://www.180.com.uy/tplef/img/logo.gif' + pass def preprocess_html(self, soup): diff --git a/recipes/diario_el_pueblo.recipe b/recipes/diario_el_pueblo.recipe index 1989c78980..770ae0103c 100644 --- a/recipes/diario_el_pueblo.recipe +++ b/recipes/diario_el_pueblo.recipe @@ -1,7 +1,17 @@ #!/usr/bin/env python2 - +## +## Title: Diario El Pueblo News and Sports Calibre Recipe +## Contact: Carlos Alves - +## +## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html +## Copyright: Carlos Alves - +## +## Written: September 2013 +## Last Edited: 2016-01-11 +## __license__ = 'GPL v3' -__author__ = '2013, Carlos Alves ' +__author__ = '2013, Carlos Alves ' + ''' diarioelpueblo.com.uy ''' diff --git a/recipes/diario_salto.recipe b/recipes/diario_salto.recipe deleted file mode 100644 index 384f5a8b01..0000000000 --- a/recipes/diario_salto.recipe +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__author__ = '2013, Carlos Alves ' -''' -diarisalto.com.uy -''' - -from calibre.web.feeds.news import BasicNewsRecipe - -class General(BasicNewsRecipe): - title = 'Diario Salto' - __author__ = 'Carlos Alves' - description = 'Noticias de Salto - Uruguay' - tags = 'news, sports' - language = 'es_UY' - timefmt = '[%a, %d %b, %Y]' - use_embedded_content = False - recursion = 5 - encoding = 'utf8' - remove_javascript = True - no_stylesheets = True - - oldest_article = 2 - max_articles_per_feed = 100 - keep_only_tags = [dict(name='div', attrs={'class':'post'})] - - remove_tags = [ - dict(name='div', attrs={'class':['hr', 'titlebar', 'navigation']}), - dict(name='div', attrs={'id':'comment'}), - dict(name=['object','link']) - ] - - extra_css = ''' - h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} - h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} - h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} - p {font-family:Arial,Helvetica,sans-serif;} - ''' - feeds = [ - (u'Articulos', u'http://www.diariosalto.com.uy/feed/atom') - ] - - def get_cover_url(self): - return 'http://diariosalto.com.uy/demo/wp-content/uploads/2011/12/diario-salto_logo-final-b-b.png' - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup diff --git a/recipes/el_observador.recipe b/recipes/el_observador.recipe index c99e714ac0..a9dd8c1237 100644 --- a/recipes/el_observador.recipe +++ b/recipes/el_observador.recipe @@ -1,6 +1,6 @@ #!/usr/bin/env python2 ## -## Last Edited: 2013-09-29 Carlos Alves +## Last Edited: 2016-01-11 Carlos Alves ## __license__ = 'GPL v3' @@ -29,7 +29,8 @@ class Noticias(BasicNewsRecipe): oldest_article = 2 max_articles_per_feed = 100 keep_only_tags = [ - dict(name='div', attrs={'class':'story collapsed'}) + dict(name='h1', attrs={'class':'detail-title newDetailTextChange'}), + dict(name='div', attrs={'class':'cuerpo air newDetailTextChange'}) ] remove_tags = [ dict(name='div', attrs={'class':['fecha', 'copyright', 'story_right']}), @@ -47,9 +48,11 @@ class Noticias(BasicNewsRecipe): p {font-family:Arial,Helvetica,sans-serif;} ''' feeds = [ - (u'Portada', u'http://elobservador.com.uy/rss/portada/'), + (u'Portada', u'http://www.elobservador.com.uy/rss/home.xml'), ] + def get_cover_url(self): + return 'http://css.elobservador.com.uy/css/181/images/logo.svg' def preprocess_html(self, soup): for item in soup.findAll(style=True): diff --git a/recipes/el_pais_uy.recipe b/recipes/el_pais_uy.recipe index d69efcc563..62b4ec3c08 100644 --- a/recipes/el_pais_uy.recipe +++ b/recipes/el_pais_uy.recipe @@ -1,4 +1,7 @@ #!/usr/bin/env python2 +## +## Last Edited: 2016-01-11 Carlos Alves +## __license__ = 'GPL v3' __author__ = '2010, Gustavo Azambuja ' @@ -18,8 +21,7 @@ class General(BasicNewsRecipe): timefmt = '[%a, %d %b, %Y]' use_embedded_content = False recursion = 2 - encoding = 'iso-8859-1' - masthead_url = 'http://www.elpais.com.uy/Images/09/cabezal/logo_PDEP.png' + encoding = 'utf-8' publication_type = 'newspaper' remove_javascript = True no_stylesheets = True @@ -27,8 +29,10 @@ class General(BasicNewsRecipe): oldest_article = 2 max_articles_per_feed = 200 keep_only_tags = [ - dict(name='h1'), - dict(name='div', attrs={'id':'Contenido'}) + dict(name='div', attrs={'class':'title'}), + dict(name='div', attrs={'class':'pc'}), + dict(name='div', attrs={'class':'image'}), + dict(name='div', attrs={'class':'article-content'}) ] conversion_options = { @@ -58,16 +62,15 @@ class General(BasicNewsRecipe): (u'Nacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=nacional'), (u'Internacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=internacional'), (u'Espectaculos', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=espectaculos'), - (u'Deportes', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=deportes'), (u'Ciudades', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=ciudades'), (u'Economia', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=economia') ] def get_cover_url(self): cover_url = None - index = 'http://www.elpais.com.uy' + index = 'http://www.elpais.com.uy/impresa/' soup = self.index_to_soup(index) - link_item = soup.find('div',attrs={'class':'boxmedio box257'}) + link_item = soup.find('div',attrs={'class':'box-dotted-white'}) print link_item if link_item: cover_url = 'http://www.elpais.com.uy'+link_item.img['src'] diff --git a/recipes/la_diaria.recipe b/recipes/la_diaria.recipe deleted file mode 100644 index a6e573ef90..0000000000 --- a/recipes/la_diaria.recipe +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python2 - -__license__ = 'GPL v3' -__author__ = '2010, Gustavo Azambuja ' -''' -ladiaria.com.uy -''' - -from calibre.web.feeds.news import BasicNewsRecipe - -class General(BasicNewsRecipe): - title = 'La Diaria' - __author__ = 'Gustavo Azambuja' - description = 'Noticias de Uruguay' - language = 'es_UY' - timefmt = '[%a, %d %b, %Y]' - use_embedded_content = False - recursion = 5 - encoding = 'utf8' - remove_javascript = True - no_stylesheets = True - - oldest_article = 2 - max_articles_per_feed = 100 - keep_only_tags = [dict(id=['article'])] - remove_tags = [ - dict(name='div', attrs={'class':['byline', 'hr', 'titlebar', 'volver-arriba-right']}), - dict(name='div', attrs={'id':'discussion'}), - dict(name=['object','link']) - ] - - extra_css = ''' - h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} - h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} - h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} - p {font-family:Arial,Helvetica,sans-serif;} - ''' - feeds = [ - (u'Articulos', u'http://ladiaria.com/feeds/articulos') - ] - - def get_cover_url(self): - return 'http://ladiaria.com/edicion/imagenportada/' - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup diff --git a/recipes/unoticias.recipe b/recipes/lr21.recipe similarity index 56% rename from recipes/unoticias.recipe rename to recipes/lr21.recipe index 1d3ac72ac1..5e7cade622 100644 --- a/recipes/unoticias.recipe +++ b/recipes/lr21.recipe @@ -1,37 +1,47 @@ #!/usr/bin/env python2 - +## +## Title: lr21.com.uy News, Sports, and Blog Calibre Recipe +## Contact: Carlos Alves - +## +## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html +## Copyright: Carlos Alves - +## +## Written: January 2016 +## Last Edited: 2016-01-11 +## __license__ = 'GPL v3' -__author__ = '2013, Carlos Alves ' +__author__ = '2016, Carlos Alves ' + ''' -unoticias.com.uy +lr21.com.uy ''' from calibre.web.feeds.news import BasicNewsRecipe class General(BasicNewsRecipe): - title = 'UNoticias' + title = 'LaRed21' __author__ = 'Carlos Alves' - description = 'Noticias Uruguay' - tags = 'news, sports, politics' + description = 'LaRed21 Diario Digital' + tags = 'soccer, futbol, news' language = 'es_UY' timefmt = '[%a, %d %b, %Y]' use_embedded_content = False recursion = 5 - encoding = 'ISO-8859-1' + encoding = None remove_javascript = True no_stylesheets = True - oldest_article = 2 + oldest_article = 10 max_articles_per_feed = 100 keep_only_tags = [ - dict(name='h1', attrs={'class':'nombre'}), - dict(name='h2', attrs={'class':'copete t20'}), - dict(name='div', attrs={'class':'desc'}) + dict(name='h1', attrs={'id':'article-title'}), + dict(name='h2', attrs={'class':'copete'}), + dict(name='div', attrs={'class':'article-content clear bottom-1'}) ] remove_tags = [ dict(name='div', attrs={'class':['br', 'hr', 'titlebar', 'navigation']}), - dict(name='div', attrs={'id':'comment'}), + dict(name='dl', attrs={'class':'gallery-item'}), dict(name=['object','link']) ] @@ -42,13 +52,11 @@ class General(BasicNewsRecipe): p {font-family:Arial,Helvetica,sans-serif;} ''' feeds = [ - (u'Nacionales', u'http://www.unoticias.com.uy/RSS/nacionales.xml'), - (u'Deportes', u'http://www.unoticias.com.uy/RSS/deportes.xml'), - (u'Sociedad', u'http://www.unoticias.com.uy/RSS/Sociedad.xml') + (u'LaRed21 Diario Digital', u'http://www.lr21.com.uy/feed') ] def get_cover_url(self): - return 'http://www.unoticias.com.uy/artworks/logos/logo_small.gif' + pass def preprocess_html(self, soup): for item in soup.findAll(style=True): diff --git a/recipes/padreydecano.recipe b/recipes/padreydecano.recipe index 4b9413ccea..b3485f8864 100644 --- a/recipes/padreydecano.recipe +++ b/recipes/padreydecano.recipe @@ -1,7 +1,17 @@ #!/usr/bin/env python2 - +## +## Title: Padreydecano.com News, Sports, and Blog Calibre Recipe +## Contact: Carlos Alves - +## +## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html +## Copyright: Carlos Alves - +## +## Written: September 2013 +## Last Edited: 2016-01-11 +## __license__ = 'GPL v3' -__author__ = '2013, Carlos Alves ' +__author__ = '2016, Carlos Alves ' + ''' padreydecano.com ''' @@ -21,7 +31,7 @@ class General(BasicNewsRecipe): remove_javascript = True no_stylesheets = True - oldest_article = 2 + oldest_article = 10 max_articles_per_feed = 100 keep_only_tags = [ dict(name='h1', attrs={'class':'entry-title'}), @@ -44,6 +54,9 @@ class General(BasicNewsRecipe): (u'Padre y Decano | Club Atlético Peñarol', u'http://www.padreydecano.com/cms/feed/') ] + def get_cover_url(self): + return 'http://www.padreydecano.com/cms/wp-content/uploads/2011/09/editorial.jpg' + def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style']