__license__ = 'GPL v3' __author__ = 'Luis Hernandez' __copyright__ = 'Luis Hernandez' __version__ = 'v0.85' __date__ = '31 January 2011' ''' www.20minutos.es ''' import re from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1294946868(BasicNewsRecipe): title = u'20 Minutos' publisher = u'Grupo 20 Minutos' __author__ = 'Luis Hernandez' description = 'Free spanish newspaper' cover_url = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif' oldest_article = 2 max_articles_per_feed = 100 remove_javascript = True no_stylesheets = True use_embedded_content = False encoding = 'ISO-8859-1' language = 'es' timefmt = '[%a, %d %b, %Y]' remove_empty_feeds = True keep_only_tags = [ dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']}) # noqa ] remove_tags_before = dict(name='ul', attrs={'class': ['servicios-sub']}) remove_tags_after = dict( name='div', attrs={'class': ['related-news', 'col']}) remove_tags = [ dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict( name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']}) # noqa ] extra_css = """ p{text-align: justify; font-size: 100%} body{ text-align: left; font-size:100% } h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } """ preprocess_regexps = [(re.compile( r'', re.DOTALL), lambda m: '')] feeds = [ (u'Portada', u'http://www.20minutos.es/rss/'), (u'Nacional', u'http://www.20minutos.es/rss/nacional/'), (u'Internacional', u'http://www.20minutos.es/rss/internacional/'), (u'Economia', u'http://www.20minutos.es/rss/economia/'), (u'Deportes', u'http://www.20minutos.es/rss/deportes/'), (u'Tecnologia', u'http://www.20minutos.es/rss/tecnologia/'), (u'Gente - TV', u'http://www.20minutos.es/rss/gente-television/'), (u'Motor', u'http://www.20minutos.es/rss/motor/'), (u'Salud', u'http://www.20minutos.es/rss/belleza-y-salud/'), (u'Viajes', u'http://www.20minutos.es/rss/viajes/'), (u'Vivienda', u'http://www.20minutos.es/rss/vivienda/'), (u'Empleo', u'http://www.20minutos.es/rss/empleo/'), (u'Cine', u'http://www.20minutos.es/rss/cine/'), (u'Musica', u'http://www.20minutos.es/rss/musica/'), (u'Vinetas', u'http://www.20minutos.es/rss/vinetas/'), (u'Comunidad20', u'http://www.20minutos.es/rss/zona20/') ]