diff --git a/recipes/hola.recipe b/recipes/hola.recipe index 1ea698228d..4e43f1fa76 100644 --- a/recipes/hola.recipe +++ b/recipes/hola.recipe @@ -1,38 +1,73 @@ #!/usr/bin/env python -__license__ = 'GPL v3' -__copyright__ = '2010, Brendan Sleight ' +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +__license__ = 'GPL v3' +__copyright__ = '30 June 2012, desUBIKado' +__author__ = 'desUBIKado' +__description__ = 'Diario de actualidad, moda y belleza' +__version__ = 'v0.01' +__date__ = '30, June 2012' ''' -hola.com +http://www.hola.com/ ''' from calibre.web.feeds.news import BasicNewsRecipe -class Hackaday(BasicNewsRecipe): - title = u'Hola' - __author__ = 'bmsleight' - description = 'diario de actualidad, moda y belleza.' - oldest_article = 10 +class hola_es(BasicNewsRecipe): + __author__ = 'desUBIKado' + description = 'Diario de actualidad, moda y belleza' + title = u'¡Hola!' + publisher = 'Hola S.L.' + category = 'Spanish celebrities, Entertainment News, Royalty, Daily Variety, Hollywood' + language = 'es' + masthead_url = 'http://imagenes.hola.com/comunes/2008/logo-holacom.gif' + timefmt = '[%a, %d %b, %Y]' + oldest_article = 7 + delay = 1 + encoding = 'utf-8' max_articles_per_feed = 100 - no_stylesheets = True - language = 'es' - use_embedded_content = False - - keep_only_tags = [ - dict(name='div', attrs={'id':'cuerpo'}) - ] + remove_empty_feeds = True + remove_javascript = True + no_stylesheets = True feeds = [ - (u'Famosos' , u'http://www.hola.com/famosos/rss.xml' ), - (u'Realeza' , u'http://www.hola.com/realeza/rss.xml' ), - (u'Cine' , u'http://www.hola.com/cine/rss.xml' ), - (u'Música' , u'http://www.hola.com/musica/rss.xml' ), - (u'Moda y modelos' , u'http://www.hola.com/moda/portada/rss.xml' ), - (u'Belleza y salud', u'http://www.hola.com/belleza/portada/rss.xml' ), - (u'Niños' , u'http://www.hola.com/ninos/rss.xml' ), - (u'Todas las noticias', u'http://int2.hola.com/app/feeds/rss_hola.php'), + (u'Famosos' , u'http://www.hola.com/famosos/rss.xml' ) + ,(u'Realeza' , u'http://www.hola.com/realeza/rss.xml' ) + ,(u'Cine' , u'http://www.hola.com/cine/rss.xml' ) + ,(u'M\xfasica' , u'http://www.hola.com/musica/rss.xml' ) + ,(u'Moda y modelos' , u'http://www.hola.com/moda/portada/rss.xml' ) + ,(u'Belleza y salud', u'http://www.hola.com/belleza/portada/rss.xml' ) + ,(u'Ni\xf1os' , u'http://www.hola.com/ninos/rss.xml' ) + ] + + + + + keep_only_tags = [dict(name='div', attrs={'id':['cuerpo','com']})] + + remove_tags = [dict(name='div', attrs={'id':['relacionadas','slide-enlaces-patrocinados','comentarios']}), + dict(name='div', attrs={'class':['slide-enlaces-patricinados-tit','compartir']}) ] + remove_tags_after = dict(name='div' , attrs={'id':'comentarios'}) + + + # Recuperamos la portada de papel (la imagen 520 tiene mayor resolucion) + + def get_cover_url(self): + index = 'http://www.hola.com/abono/ediciondigital/' + soup = self.index_to_soup(index) + for image in soup.findAll('img',src=True): + if image['src'].endswith('portada-revista-hola-520.jpg'): + return 'http://www.hola.com/' + image['src'] + return None + + def get_article_url(self, article): url = article.get('guid', None) return url + + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;} + '''