diff --git a/recipes/el_mundo_today.recipe b/recipes/el_mundo_today.recipe index 77a9f331a0..7f558d10e7 100644 --- a/recipes/el_mundo_today.recipe +++ b/recipes/el_mundo_today.recipe @@ -1,3 +1,4 @@ +import re from calibre.web.feeds.news import BasicNewsRecipe class ElMundoTodayRecipe(BasicNewsRecipe): @@ -7,11 +8,32 @@ class ElMundoTodayRecipe(BasicNewsRecipe): category = 'Noticias, humor' cover_url = 'http://www.elmundotoday.com/wp-content/themes/EarthlyTouch/images/logo.png' oldest_article = 30 - max_articles_per_feed = 30 - auto_cleanup = True + max_articles_per_feed = 60 + auto_cleanup = False no_stylesheets = True + remove_javascript = True language = 'es' - use_embedded_content = True + use_embedded_content = False + + preprocess_regexps = [ + (re.compile(r'.*', re.DOTALL), + lambda match: ''), + #(re.compile(r'^\t{5}$'), lambda match: ''), + #(re.compile(r'\t{5}$'), lambda match: ''), + (re.compile(r'
', re.DOTALL), + lambda match: ''), + ] + + keep_only_tags = [ + dict(name='div', attrs={'class':'post-wrapper'}) + ] + + remove_attributes = [ 'href', 'title', 'alt' ] + + extra_css = ''' + .antetitulo{font-variant:small-caps; font-weight:bold} .articleinfo{font-size:small} + img{margin-bottom:0.4em; display:block; margin-left:auto; margin-right:auto} + ''' feeds = [('El Mundo Today', 'http://www.elmundotoday.com/feed/')]