diff --git a/recipes/10minutos.recipe b/recipes/10minutos.recipe index fcb4ba642b..7a2983705b 100644 --- a/recipes/10minutos.recipe +++ b/recipes/10minutos.recipe @@ -1,17 +1,17 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2 ## # Title: Diario 10minutos.com.uy News and Sports Calibre Recipe -# Contact: Carlos Alves - +# Contact: Carlos Alves - ## # License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html -# Copyright: Carlos Alves - +# Copyright: Carlos Alves - ## # Written: September 2013 -# Last Edited: 2016-01-11 +# Last Edited: 2018-02-13 ## __license__ = 'GPL v3' -__author__ = '2016, Carlos Alves ' +__author__ = '2016, Carlos Alves ' ''' 10minutos.com.uy ''' @@ -44,17 +44,20 @@ class General(BasicNewsRecipe): ] extra_css = ''' - h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} - h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} - h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} - p {font-family:Arial,Helvetica,sans-serif;} + h1{font-family: Georgia,"Times New Roman",Times,serif} + h3{font-family: Georgia,"Times New Roman",Times,serif} + h2{font-family: Georgia,"Times New Roman",Times,serif} + p{font-family: Verdana,Arial,Helvetica,sans-serif} + body{font-family: Verdana,Arial,Helvetica,sans-serif} + img{margin-bottom: 0.4em; display:block;} ''' + feeds = [ (u'Articulos', u'http://10minutos.com.uy/?feed=rss2') ] def get_cover_url(self): - return 'http://10minutos.com.uy/a/img/logo.png' + return None def preprocess_html(self, soup): for item in soup.findAll(style=True): diff --git a/recipes/180.recipe b/recipes/180.recipe index a579a165ad..ba0acdde14 100644 --- a/recipes/180.recipe +++ b/recipes/180.recipe @@ -1,6 +1,6 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2 ## -# Last Edited: 2016-01-11 Carlos Alves +# Last Edited: 2018-02-13 Carlos Alves ## __license__ = 'GPL v3' @@ -28,22 +28,26 @@ class Noticias(BasicNewsRecipe): max_articles_per_feed = 100 remove_tags_after = dict(name='article') keep_only_tags = [ - dict(name='h3', attrs={'class': 'title'}), - dict(name='div', attrs={'class': 'copete'}), - dict(name='article', attrs={'class': 'texto'}) + dict(name='div', attrs={'class': 'nota'}), + dict(name='h3',), + dict(name='h4',), + dict(name='article',) ] remove_tags = [ - dict(name=['object', 'link']) + dict(name='div', attrs={'class': 'items'}) ] remove_attributes = ['width', 'height', 'style', 'font', 'color'] extra_css = ''' - h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} - h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} - h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} - p {font-family:Arial,Helvetica,sans-serif;} + h1{font-family: Georgia,"Times New Roman",Times,serif} + h3{font-family: Georgia,"Times New Roman",Times,serif} + h2{font-family: Georgia,"Times New Roman",Times,serif} + p{font-family: Verdana,Arial,Helvetica,sans-serif} + body{font-family: Verdana,Arial,Helvetica,sans-serif} + img{margin-bottom: 0.4em; display:block;} ''' + feeds = [ (u'Titulares', u'http://www.180.com.uy/feed.php') ] diff --git a/recipes/diario_el_pueblo.recipe b/recipes/diario_el_pueblo.recipe index 638960fe8e..de055b55b3 100644 --- a/recipes/diario_el_pueblo.recipe +++ b/recipes/diario_el_pueblo.recipe @@ -1,16 +1,16 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2 ## # Title: Diario El Pueblo News and Sports Calibre Recipe -# Contact: Carlos Alves - +# Contact: Carlos Alves - ## # License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html -# Copyright: Carlos Alves - +# Copyright: Carlos Alves - ## # Written: September 2013 -# Last Edited: 2016-01-11 +# Last Edited: 2018-02-13 ## __license__ = 'GPL v3' -__author__ = '2013, Carlos Alves ' +__author__ = '2013, Carlos Alves ' ''' diarioelpueblo.com.uy @@ -46,17 +46,20 @@ class General(BasicNewsRecipe): ] extra_css = ''' - h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} - h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} - h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} - p {font-family:Arial,Helvetica,sans-serif;} + h1{font-family: Georgia,"Times New Roman",Times,serif} + h3{font-family: Georgia,"Times New Roman",Times,serif} + h2{font-family: Georgia,"Times New Roman",Times,serif} + p{font-family: Verdana,Arial,Helvetica,sans-serif} + body{font-family: Verdana,Arial,Helvetica,sans-serif} + img{margin-bottom: 0.4em; display:block;} ''' + feeds = [ (u'Articulos', u'http://www.diarioelpueblo.com.uy/feed') ] def get_cover_url(self): - return 'http://www.diarioelpueblo.com.uy/wp-content/uploads/2013/06/Cabezal_Web1.jpg' + return None def preprocess_html(self, soup): for item in soup.findAll(style=True): diff --git a/recipes/el_observador.recipe b/recipes/el_observador.recipe index 45cb9e56ab..8b27c48fd6 100644 --- a/recipes/el_observador.recipe +++ b/recipes/el_observador.recipe @@ -1,6 +1,6 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2 ## -# Last Edited: 2016-01-11 Carlos Alves +# Last Edited: 2018-02-13 Carlos Alves ## __license__ = 'GPL v3' @@ -44,17 +44,20 @@ class Noticias(BasicNewsRecipe): remove_attributes = ['width', 'height', 'style', 'font', 'color'] extra_css = ''' - h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} - h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} - h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} - p {font-family:Arial,Helvetica,sans-serif;} + h1{font-family: Georgia,"Times New Roman",Times,serif} + h3{font-family: Georgia,"Times New Roman",Times,serif} + h2{font-family: Georgia,"Times New Roman",Times,serif} + p{font-family: Verdana,Arial,Helvetica,sans-serif} + body{font-family: Verdana,Arial,Helvetica,sans-serif} + img{margin-bottom: 0.4em; display:block;} ''' + feeds = [ (u'Portada', u'http://www.elobservador.com.uy/rss/home.xml'), ] def get_cover_url(self): - return 'http://css.elobservador.com.uy/css/181/images/logo.svg' + return None def preprocess_html(self, soup): for item in soup.findAll(style=True): diff --git a/recipes/el_pais_uy.recipe b/recipes/el_pais_uy.recipe index 4cff7f8db6..84056ad2c0 100644 --- a/recipes/el_pais_uy.recipe +++ b/recipes/el_pais_uy.recipe @@ -1,6 +1,6 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2 ## -# Last Edited: 2016-01-11 Carlos Alves +# Last Edited: 2018-02-13 Carlos Alves ## __license__ = 'GPL v3' @@ -28,12 +28,11 @@ class General(BasicNewsRecipe): no_stylesheets = True oldest_article = 2 - max_articles_per_feed = 200 + max_articles_per_feed = 20 keep_only_tags = [ - dict(name='div', attrs={'class': 'title'}), - dict(name='div', attrs={'class': 'pc'}), - dict(name='div', attrs={'class': 'image'}), - dict(name='div', attrs={'class': 'article-content'}) + dict(name='h1', attrs={'class': 'title'}), + dict(name='div', attrs={'class': 'composite-captioned-image'}), + dict(name='div', attrs={'class': 'content-modules'}) ] conversion_options = { @@ -49,32 +48,27 @@ class General(BasicNewsRecipe): dict(name=['object', 'table'])] extra_css = ''' - h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} - h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} - h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} - p {font-family:Arial,Helvetica,sans-serif;} - body{font-family: Verdana,Arial,Helvetica,sans-serif } + h1{font-family: Georgia,"Times New Roman",Times,serif} + h3{font-family: Georgia,"Times New Roman",Times,serif} + h2{font-family: Georgia,"Times New Roman",Times,serif} + p{font-family: Verdana,Arial,Helvetica,sans-serif} + body{font-family: Verdana,Arial,Helvetica,sans-serif} img{margin-bottom: 0.4em; display:block;} ''' + feeds = [ (u'Ultimo Momento', - u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=umomento'), - (u'Editorial', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=editorial'), - (u'Nacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=nacional'), - (u'Internacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=internacional'), - (u'Espectaculos', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=espectaculos'), - (u'Ciudades', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=ciudades'), - (u'Economia', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=economia') + u'https://www.elpais.com.uy/rss/') ] def get_cover_url(self): cover_url = None - index = 'http://www.elpais.com.uy/impresa/' + index = 'https://www.elpais.com.uy/impresa/' soup = self.index_to_soup(index) - link_item = soup.find('div', attrs={'class': 'box-dotted-white'}) - print link_item + link_item = soup.find('a', attrs={'class': 'page-link link-module'}) + #print link_item if link_item: - cover_url = 'http://www.elpais.com.uy' + link_item.img['src'] + cover_url = 'https://www.elpais.com.uy' + link_item.get('href') return cover_url def preprocess_html(self, soup): diff --git a/recipes/lr21.recipe b/recipes/lr21.recipe deleted file mode 100644 index 5c0835aba9..0000000000 --- a/recipes/lr21.recipe +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python2 -## -# Title: lr21.com.uy News, Sports, and Blog Calibre Recipe -# Contact: Carlos Alves - -## -# License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html -# Copyright: Carlos Alves - -## -# Written: January 2016 -# Last Edited: 2016-01-11 -## -__license__ = 'GPL v3' -__author__ = '2016, Carlos Alves ' - -''' -lr21.com.uy -''' - -from calibre.web.feeds.news import BasicNewsRecipe - - -class General(BasicNewsRecipe): - title = 'LaRed21' - __author__ = 'Carlos Alves' - description = 'LaRed21 Diario Digital' - tags = 'soccer, futbol, news' - language = 'es_UY' - timefmt = '[%a, %d %b, %Y]' - use_embedded_content = False - recursion = 5 - encoding = None - remove_javascript = True - no_stylesheets = True - - oldest_article = 10 - max_articles_per_feed = 100 - keep_only_tags = [ - dict(name='h1', attrs={'id': 'article-title'}), - dict(name='h2', attrs={'class': 'copete'}), - dict(name='div', attrs={'class': 'article-content clear bottom-1'}) - ] - - remove_tags = [ - dict(name='div', attrs={ - 'class': ['br', 'hr', 'titlebar', 'navigation']}), - dict(name='dl', attrs={'class': 'gallery-item'}), - dict(name=['object', 'link']) - ] - - extra_css = ''' - h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} - h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} - h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} - p {font-family:Arial,Helvetica,sans-serif;} - ''' - feeds = [ - (u'LaRed21 Diario Digital', u'http://www.lr21.com.uy/feed') - ] - - def get_cover_url(self): - pass - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup diff --git a/recipes/padreydecano.recipe b/recipes/padreydecano.recipe index 419952dda9..8bd45db312 100644 --- a/recipes/padreydecano.recipe +++ b/recipes/padreydecano.recipe @@ -1,16 +1,16 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python2 ## -# Title: Padreydecano.com News, Sports, and Blog Calibre Recipe -# Contact: Carlos Alves - +# Title: Padreydecano.com News, Sports, and Blog +# Contact: Carlos Alves - ## # License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html -# Copyright: Carlos Alves - +# Copyright: Carlos Alves - ## # Written: September 2013 -# Last Edited: 2016-01-11 +# Last Edited: 2018-02-13 ## __license__ = 'GPL v3' -__author__ = '2016, Carlos Alves ' +__author__ = '2016, Carlos Alves ' ''' padreydecano.com @@ -47,18 +47,21 @@ class General(BasicNewsRecipe): ] extra_css = ''' - h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} - h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} - h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} - p {font-family:Arial,Helvetica,sans-serif;} + h1{font-family: Georgia,"Times New Roman",Times,serif} + h3{font-family: Georgia,"Times New Roman",Times,serif} + h2{font-family: Georgia,"Times New Roman",Times,serif} + p{font-family: Verdana,Arial,Helvetica,sans-serif} + body{font-family: Verdana,Arial,Helvetica,sans-serif} + img{margin-bottom: 0.4em; display:block;} ''' + feeds = [ (u'Padre y Decano | Club Atlético Peñarol', u'http://www.padreydecano.com/cms/feed/') ] def get_cover_url(self): - return 'http://www.padreydecano.com/cms/wp-content/uploads/2011/09/editorial.jpg' + return None def preprocess_html(self, soup): for item in soup.findAll(style=True):