diff --git a/recipes/10minutos.recipe b/recipes/10minutos.recipe new file mode 100644 index 0000000000..4c2f8a7ec7 --- /dev/null +++ b/recipes/10minutos.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +10minutos.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = '10minutos' + __author__ = 'Carlos Alves' + description = 'Noticias de Salto - Uruguay' + tags = 'news, sports' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(name='div', attrs={'class':'post-content'})] + + remove_tags = [ + dict(name='div', attrs={'class':['hr', 'titlebar', 'navigation']}), + dict(name='p', attrs={'class':'post-meta'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://10minutos.com.uy/feed/') + ] + + def get_cover_url(self): + return 'http://10minutos.com.uy/a/img/logo.png' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/diario_el_pueblo.recipe b/recipes/diario_el_pueblo.recipe new file mode 100644 index 0000000000..4cfab9eb32 --- /dev/null +++ b/recipes/diario_el_pueblo.recipe @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +diarioelpueblo.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Diario El Pueblo' + __author__ = 'Carlos Alves' + description = 'Noticias de Salto - Uruguay' + tags = 'news, sports' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(name='div', attrs={'class':'post-alt blog'})] + + remove_tags = [ + dict(name='div', attrs={'class':['hr', 'titlebar', 'volver-arriba-right','navigation']}), + dict(name='div', attrs={'id':'comment','id':'suckerfish','id':'crp_related'}), + dict(name='h3', attrs={'class':['post_date']}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://www.diarioelpueblo.com.uy/feed') + ] + + def get_cover_url(self): + return 'http://www.diarioelpueblo.com.uy/wp-content/uploads/2013/06/Cabezal_Web1.jpg' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/diario_salto.recipe b/recipes/diario_salto.recipe new file mode 100644 index 0000000000..799233db4d --- /dev/null +++ b/recipes/diario_salto.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +diarisalto.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Diario Salto' + __author__ = 'Carlos Alves' + description = 'Noticias de Salto - Uruguay' + tags = 'news, sports' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'utf8' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [dict(name='div', attrs={'class':'post'})] + + remove_tags = [ + dict(name='div', attrs={'class':['hr', 'titlebar', 'navigation']}), + dict(name='div', attrs={'id':'comment'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Articulos', u'http://www.diariosalto.com.uy/feed/atom') + ] + + def get_cover_url(self): + return 'http://diariosalto.com.uy/demo/wp-content/uploads/2011/12/diario-salto_logo-final-b-b.png' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/el_observador.recipe b/recipes/el_observador.recipe index 994963671e..c82a1b7380 100644 --- a/recipes/el_observador.recipe +++ b/recipes/el_observador.recipe @@ -1,18 +1,23 @@ #!/usr/bin/env python +## +## Last Edited: 2013-09-29 Carlos Alves +## __license__ = 'GPL v3' __author__ = '2010, Yuri Alvarez' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' + ''' -observa.com.uy +elobservador.com.uy ''' from calibre.web.feeds.news import BasicNewsRecipe -class ObservaDigital(BasicNewsRecipe): - title = 'Observa Digital' - __author__ = 'yrvn' - description = 'Noticias de Uruguay' +class Noticias(BasicNewsRecipe): + title = 'El Observador' + __author__ = 'yrvn' + description = 'Noticias desde Uruguay' + tags = 'news, sports, entretainment' language = 'es_UY' timefmt = '[%a, %d %b, %Y]' use_embedded_content = False @@ -23,13 +28,18 @@ class ObservaDigital(BasicNewsRecipe): oldest_article = 2 max_articles_per_feed = 100 - keep_only_tags = [dict(id=['contenido'])] + keep_only_tags = [ + dict(name='div', attrs={'class':'story collapsed'}) + ] remove_tags = [ - dict(name='div', attrs={'id':'contenedorVinculadas'}), - dict(name='p', attrs={'id':'nota_firma'}), + dict(name='div', attrs={'class':['fecha', 'copyright', 'story_right']}), + dict(name='div', attrs={'class':['photo', 'social']}), + dict(name='div', attrs={'id':'widget'}), dict(name=['object','link']) ] + remove_attributes = ['width','height', 'style', 'font', 'color'] + extra_css = ''' h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} @@ -37,19 +47,9 @@ class ObservaDigital(BasicNewsRecipe): p {font-family:Arial,Helvetica,sans-serif;} ''' feeds = [ - (u'Actualidad', u'http://www.observa.com.uy/RSS/actualidad.xml'), - (u'Deportes', u'http://www.observa.com.uy/RSS/deportes.xml'), - (u'Vida', u'http://www.observa.com.uy/RSS/vida.xml'), - (u'Ciencia y Tecnologia', u'http://www.observa.com.uy/RSS/ciencia.xml') + (u'Portada', u'http://elobservador.com.uy/rss/portada/'), ] - def get_cover_url(self): - index = 'http://www.observa.com.uy/' - soup = self.index_to_soup(index) - for image in soup.findAll('img',alt=True): - if image['alt'].startswith('Tapa El Observador'): - return image['src'].rstrip('b.jpg') + '.jpg' - return None def preprocess_html(self, soup): for item in soup.findAll(style=True): diff --git a/recipes/padreydecano.recipe b/recipes/padreydecano.recipe new file mode 100644 index 0000000000..3e1cbf24f2 --- /dev/null +++ b/recipes/padreydecano.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +padreydecano.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Padre y Decano' + __author__ = 'Carlos Alves' + description = 'El sitio del pueblo' + tags = 'soccer, futbol, Peñarol' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = None + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [ + dict(name='h1', attrs={'class':'entry-title'}), + dict(name='div', attrs={'class':'entry-content clearfix'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['br', 'hr', 'titlebar', 'navigation']}), + dict(name='dl', attrs={'class':'gallery-item'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Padre y Decano | Club Atlético Peñarol', u'http://www.padreydecano.com/cms/feed/') + ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup diff --git a/recipes/unoticias.recipe b/recipes/unoticias.recipe new file mode 100644 index 0000000000..f7d1c7693c --- /dev/null +++ b/recipes/unoticias.recipe @@ -0,0 +1,56 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +unoticias.com.uy +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'UNoticias' + __author__ = 'Carlos Alves' + description = 'Noticias Uruguay' + tags = 'news, sports, politics' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = 'ISO-8859-1' + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [ + dict(name='h1', attrs={'class':'nombre'}), + dict(name='h2', attrs={'class':'copete t20'}), + dict(name='div', attrs={'class':'desc'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['br', 'hr', 'titlebar', 'navigation']}), + dict(name='div', attrs={'id':'comment'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Nacionales', u'http://www.unoticias.com.uy/RSS/nacionales.xml'), + (u'Deportes', u'http://www.unoticias.com.uy/RSS/deportes.xml'), + (u'Sociedad', u'http://www.unoticias.com.uy/RSS/Sociedad.xml') + ] + + def get_cover_url(self): + return 'http://www.unoticias.com.uy/artworks/logos/logo_small.gif' + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup