mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-24 23:38:55 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			91 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			91 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python
 | |
| # vim:fileencoding=utf-8
 | |
| __license__ = 'GPL v3'
 | |
| __author__ = 'Luis Hernandez'
 | |
| __copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
 | |
| __version__ = 'v0.85'
 | |
| __date__ = '31 January 2011'
 | |
| 
 | |
| '''
 | |
| www.20minutos.es
 | |
| '''
 | |
| import re
 | |
| 
 | |
| from calibre.web.feeds.news import BasicNewsRecipe
 | |
| 
 | |
| 
 | |
| class AdvancedUserRecipe1294946868(BasicNewsRecipe):
 | |
| 
 | |
|     title = u'20 Minutos'
 | |
|     publisher = u'Grupo 20 Minutos'
 | |
| 
 | |
|     __author__ = 'Luis Hernandez'
 | |
|     description = 'Free spanish newspaper'
 | |
|     cover_url = 'http://estaticos.20minutos.es/mmedia/especiales/corporativo/css/img/logotipos_grupo20minutos.gif'
 | |
| 
 | |
|     oldest_article = 2
 | |
|     max_articles_per_feed = 100
 | |
| 
 | |
|     remove_javascript = True
 | |
|     no_stylesheets = True
 | |
|     use_embedded_content = False
 | |
| 
 | |
|     encoding = 'ISO-8859-1'
 | |
|     language = 'es'
 | |
|     timefmt = '[%a, %d %b, %Y]'
 | |
|     remove_empty_feeds = True
 | |
| 
 | |
|     keep_only_tags = [
 | |
|         dict(name='div', attrs={'id': ['content', 'vinetas', ]}), dict(name='div', attrs={'class': ['boxed', 'description', 'lead', 'article-content', 'cuerpo estirar']}), dict(name='span', attrs={'class': ['photo-bar']}), dict(name='ul', attrs={'class': ['article-author']})  # noqa: E501
 | |
|     ]
 | |
| 
 | |
|     remove_tags_before = dict(name='ul', attrs={'class': ['servicios-sub']})
 | |
|     remove_tags_after = dict(name='div', attrs={'class': ['related-news', 'col']})
 | |
| 
 | |
|     remove_tags = [
 | |
|         dict(name='ol', attrs={'class': ['navigation', ]}), dict(name='span', attrs={'class': ['action']}), dict(name='div', attrs={'class': ['twitter comments-list hidden', 'related-news', 'col', 'photo-gallery', 'photo-gallery side-art-block', 'calendario', 'article-comment', 'postto estirar', 'otras_vinetas estirar', 'kment', 'user-actions']}), dict(name='div', attrs={'id': ['twitter-destacados', 'eco-tabs', 'inner', 'vineta_calendario', 'vinetistas clearfix', 'otras_vinetas estirar', 'MIN1', 'main', 'SUP1', 'INT']}), dict(name='ul', attrs={'class': ['article-user-actions', 'stripped-list']}), dict(name='ul', attrs={'id': ['site-links']}), dict(name='li', attrs={'class': ['puntuacion', 'enviar', 'compartir']})  # noqa: E501
 | |
|     ]
 | |
| 
 | |
|     extra_css = '''
 | |
|                 p{text-align: justify; font-size: 100%}
 | |
|                 body{ text-align: left; font-size:100% }
 | |
|                 h3{font-family: sans-serif; font-size:150%; font-weight:bold; text-align: justify; }
 | |
|                 '''
 | |
| 
 | |
|     preprocess_regexps = [(re.compile(
 | |
|         r'<a href="http://estaticos.*?[0-999]px;" target="_blank">', re.DOTALL), lambda m: '')]
 | |
| 
 | |
|     recipe_specific_options = {
 | |
|         'days': {
 | |
|             'short': 'Oldest article to download from this news source. In days ',
 | |
|             'long': 'For example, 0.5, gives you articles from the past 12 hours',
 | |
|             'default': str(oldest_article)
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     def __init__(self, *args, **kwargs):
 | |
|         BasicNewsRecipe.__init__(self, *args, **kwargs)
 | |
|         d = self.recipe_specific_options.get('days')
 | |
|         if d and isinstance(d, str):
 | |
|             self.oldest_article = float(d)
 | |
| 
 | |
|     feeds = [
 | |
| 
 | |
|     (u'Portada', u'http://www.20minutos.es/rss/'),
 | |
|     (u'Nacional', u'http://www.20minutos.es/rss/nacional/'),
 | |
|     (u'Internacional', u'http://www.20minutos.es/rss/internacional/'),
 | |
|     (u'Economia', u'http://www.20minutos.es/rss/economia/'),
 | |
|     (u'Deportes', u'http://www.20minutos.es/rss/deportes/'),
 | |
|     (u'Tecnologia', u'http://www.20minutos.es/rss/tecnologia/'),
 | |
|     (u'Gente - TV', u'http://www.20minutos.es/rss/gente-television/'),
 | |
|     (u'Motor', u'http://www.20minutos.es/rss/motor/'),
 | |
|     (u'Salud', u'http://www.20minutos.es/rss/belleza-y-salud/'),
 | |
|     (u'Viajes', u'http://www.20minutos.es/rss/viajes/'),
 | |
|     (u'Vivienda', u'http://www.20minutos.es/rss/vivienda/'),
 | |
|     (u'Empleo', u'http://www.20minutos.es/rss/empleo/'),
 | |
|     (u'Cine', u'http://www.20minutos.es/rss/cine/'),
 | |
|     (u'Musica', u'http://www.20minutos.es/rss/musica/'),
 | |
|     (u'Vinetas', u'http://www.20minutos.es/rss/vinetas/'),
 | |
|     (u'Comunidad20', u'http://www.20minutos.es/rss/zona20/')
 | |
|     ]
 |