#!/usr/bin/env python # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai __license__ = 'GPL v3' __copyright__ = '30 June 2012, desUBIKado' __author__ = 'desUBIKado' __description__ = 'Diario de actualidad, moda y belleza' __version__ = 'v0.01' __date__ = '30, June 2012' ''' http://www.hola.com/ ''' from calibre.web.feeds.news import BasicNewsRecipe class hola_es(BasicNewsRecipe): __author__ = 'desUBIKado' description = 'Diario de actualidad, moda y belleza' title = u'¡Hola!' publisher = 'Hola S.L.' category = 'Spanish celebrities, Entertainment News, Royalty, Daily Variety, Hollywood' language = 'es' masthead_url = 'http://imagenes.hola.com/comunes/2008/logo-holacom.gif' timefmt = '[%a, %d %b, %Y]' oldest_article = 7 delay = 1 encoding = 'utf-8' max_articles_per_feed = 100 use_embedded_content = False remove_empty_feeds = True remove_javascript = True no_stylesheets = True feeds = [ (u'Famosos' , u'http://www.hola.com/famosos/rss.xml' ) ,(u'Realeza' , u'http://www.hola.com/realeza/rss.xml' ) ,(u'Cine' , u'http://www.hola.com/cine/rss.xml' ) ,(u'M\xfasica' , u'http://www.hola.com/musica/rss.xml' ) ,(u'Moda y modelos' , u'http://www.hola.com/moda/portada/rss.xml' ) ,(u'Belleza y salud', u'http://www.hola.com/belleza/portada/rss.xml' ) ,(u'Ni\xf1os' , u'http://www.hola.com/ninos/rss.xml' ) ] keep_only_tags = [dict(name='div', attrs={'id':['cuerpo','com']})] remove_tags = [dict(name='div', attrs={'id':['relacionadas','slide-enlaces-patrocinados','comentarios']}), dict(name='div', attrs={'class':['slide-enlaces-patricinados-tit','compartir']}) ] remove_tags_after = dict(name='div' , attrs={'id':'comentarios'}) # Recuperamos la portada de papel (la imagen 520 tiene mayor resolucion) def get_cover_url(self): index = 'http://www.hola.com/abono/ediciondigital/' soup = self.index_to_soup(index) for image in soup.findAll('img',src=True): if image['src'].endswith('portada-revista-hola-520.jpg'): return 'http://www.hola.com/' + image['src'] return None def get_article_url(self, article): url = article.get('guid', None) return url extra_css = ''' h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;} h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;} '''