From a342162cffd5b78368d06807a7703e4f99755bbc Mon Sep 17 00:00:00 2001 From: Carlos Alves Date: Sun, 29 Sep 2013 17:34:45 -0300 Subject: [PATCH] Add padreydecano.recipe, update el_observador... MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add padreydecano.recipe (C.A. Peñarol related news) Update el_observador.recipe it was broken for a long time. --- recipes/el_observador.recipe | 38 +++++++++++++-------------- recipes/padreydecano.recipe | 50 ++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 19 deletions(-) create mode 100644 recipes/padreydecano.recipe diff --git a/recipes/el_observador.recipe b/recipes/el_observador.recipe index 994963671e..c82a1b7380 100644 --- a/recipes/el_observador.recipe +++ b/recipes/el_observador.recipe @@ -1,18 +1,23 @@ #!/usr/bin/env python +## +## Last Edited: 2013-09-29 Carlos Alves +## __license__ = 'GPL v3' __author__ = '2010, Yuri Alvarez' __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' + ''' -observa.com.uy +elobservador.com.uy ''' from calibre.web.feeds.news import BasicNewsRecipe -class ObservaDigital(BasicNewsRecipe): - title = 'Observa Digital' - __author__ = 'yrvn' - description = 'Noticias de Uruguay' +class Noticias(BasicNewsRecipe): + title = 'El Observador' + __author__ = 'yrvn' + description = 'Noticias desde Uruguay' + tags = 'news, sports, entretainment' language = 'es_UY' timefmt = '[%a, %d %b, %Y]' use_embedded_content = False @@ -23,13 +28,18 @@ class ObservaDigital(BasicNewsRecipe): oldest_article = 2 max_articles_per_feed = 100 - keep_only_tags = [dict(id=['contenido'])] + keep_only_tags = [ + dict(name='div', attrs={'class':'story collapsed'}) + ] remove_tags = [ - dict(name='div', attrs={'id':'contenedorVinculadas'}), - dict(name='p', attrs={'id':'nota_firma'}), + dict(name='div', attrs={'class':['fecha', 'copyright', 'story_right']}), + dict(name='div', attrs={'class':['photo', 'social']}), + dict(name='div', attrs={'id':'widget'}), dict(name=['object','link']) ] + remove_attributes = ['width','height', 'style', 'font', 'color'] + extra_css = ''' h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} @@ -37,19 +47,9 @@ class ObservaDigital(BasicNewsRecipe): p {font-family:Arial,Helvetica,sans-serif;} ''' feeds = [ - (u'Actualidad', u'http://www.observa.com.uy/RSS/actualidad.xml'), - (u'Deportes', u'http://www.observa.com.uy/RSS/deportes.xml'), - (u'Vida', u'http://www.observa.com.uy/RSS/vida.xml'), - (u'Ciencia y Tecnologia', u'http://www.observa.com.uy/RSS/ciencia.xml') + (u'Portada', u'http://elobservador.com.uy/rss/portada/'), ] - def get_cover_url(self): - index = 'http://www.observa.com.uy/' - soup = self.index_to_soup(index) - for image in soup.findAll('img',alt=True): - if image['alt'].startswith('Tapa El Observador'): - return image['src'].rstrip('b.jpg') + '.jpg' - return None def preprocess_html(self, soup): for item in soup.findAll(style=True): diff --git a/recipes/padreydecano.recipe b/recipes/padreydecano.recipe new file mode 100644 index 0000000000..3e1cbf24f2 --- /dev/null +++ b/recipes/padreydecano.recipe @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +__license__ = 'GPL v3' +__author__ = '2013, Carlos Alves ' +''' +padreydecano.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class General(BasicNewsRecipe): + title = 'Padre y Decano' + __author__ = 'Carlos Alves' + description = 'El sitio del pueblo' + tags = 'soccer, futbol, Peñarol' + language = 'es_UY' + timefmt = '[%a, %d %b, %Y]' + use_embedded_content = False + recursion = 5 + encoding = None + remove_javascript = True + no_stylesheets = True + + oldest_article = 2 + max_articles_per_feed = 100 + keep_only_tags = [ + dict(name='h1', attrs={'class':'entry-title'}), + dict(name='div', attrs={'class':'entry-content clearfix'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['br', 'hr', 'titlebar', 'navigation']}), + dict(name='dl', attrs={'class':'gallery-item'}), + dict(name=['object','link']) + ] + + extra_css = ''' + h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;} + h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;} + h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;} + p {font-family:Arial,Helvetica,sans-serif;} + ''' + feeds = [ + (u'Padre y Decano | Club Atlético Peñarol', u'http://www.padreydecano.com/cms/feed/') + ] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup