__license__ = 'GPL v3' __author__ = 'Luis Hernandez' __copyright__ = 'Luis Hernandez' __version__ = 'v0.85' __date__ = '31 January 2011' ''' www.20minutos.es ''' import re from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1294946868(BasicNewsRecipe): title = u'20 Minutos new' publisher = u'Grupo 20 Minutos' __author__ = 'Luis Hernandez' description = 'Free spanish newspaper' cover_url = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif' oldest_article = 2 max_articles_per_feed = 100 remove_javascript = True no_stylesheets = True use_embedded_content = False encoding = 'ISO-8859-1' language = 'es' timefmt = '[%a, %d %b, %Y]' remove_empty_feeds = True keep_only_tags = [ dict(name='div', attrs={'id':['content','vinetas',]}) ,dict(name='div', attrs={'class':['boxed','description','lead','article-content','cuerpo estirar']}) ,dict(name='span', attrs={'class':['photo-bar']}) ,dict(name='ul', attrs={'class':['article-author']}) ] remove_tags_before = dict(name='ul' , attrs={'class':['servicios-sub']}) remove_tags_after = dict(name='div' , attrs={'class':['related-news','col']}) remove_tags = [ dict(name='ol', attrs={'class':['navigation',]}) ,dict(name='span', attrs={'class':['action']}) ,dict(name='div', attrs={'class':['twitter comments-list hidden','related-news','col','photo-gallery','photo-gallery side-art-block','calendario','article-comment','postto estirar','otras_vinetas estirar','kment','user-actions']}) ,dict(name='div', attrs={'id':['twitter-destacados','eco-tabs','inner','vineta_calendario','vinetistas clearfix','otras_vinetas estirar','MIN1','main','SUP1','INT']}) ,dict(name='ul', attrs={'class':['article-user-actions','stripped-list']}) ,dict(name='ul', attrs={'id':['site-links']}) ,dict(name='li', attrs={'class':['puntuacion','enviar','compartir']}) ] extra_css = """ p{text-align: justify; font-size: 100%} body{ text-align: left; font-size:100% } h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; } """ preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] feeds = [ (u'Portada' , u'http://www.20minutos.es/rss/') ,(u'Nacional' , u'http://www.20minutos.es/rss/nacional/') ,(u'Internacional' , u'http://www.20minutos.es/rss/internacional/') ,(u'Economia' , u'http://www.20minutos.es/rss/economia/') ,(u'Deportes' , u'http://www.20minutos.es/rss/deportes/') ,(u'Tecnologia' , u'http://www.20minutos.es/rss/tecnologia/') ,(u'Gente - TV' , u'http://www.20minutos.es/rss/gente-television/') ,(u'Motor' , u'http://www.20minutos.es/rss/motor/') ,(u'Salud' , u'http://www.20minutos.es/rss/belleza-y-salud/') ,(u'Viajes' , u'http://www.20minutos.es/rss/viajes/') ,(u'Vivienda' , u'http://www.20minutos.es/rss/vivienda/') ,(u'Empleo' , u'http://www.20minutos.es/rss/empleo/') ,(u'Cine' , u'http://www.20minutos.es/rss/cine/') ,(u'Musica' , u'http://www.20minutos.es/rss/musica/') ,(u'Vinetas' , u'http://www.20minutos.es/rss/vinetas/') ,(u'Comunidad20' , u'http://www.20minutos.es/rss/zona20/') ]