diff --git a/recipes/daily_express.recipe b/recipes/daily_express.recipe new file mode 100644 index 0000000000..c9b620eeec --- /dev/null +++ b/recipes/daily_express.recipe @@ -0,0 +1,67 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import re +class AdvancedUserRecipe1376229553(BasicNewsRecipe): + title = u'Daily Express' + __author__ = 'Dave Asbury' + encoding = 'utf-8' + remove_empty_feeds = True + #remove_javascript = True + no_stylesheets = True + oldest_article = 2 + max_articles_per_feed = 10 + #auto_cleanup = True + compress_news_images = True + compress_news_images_max_size = 25 + ignore_duplicate_articles = {'title', 'url'} + + remove_tags = [ + dict(name='footer'), + dict(attrs={'id' : 'header_addons'}), + dict(attrs={'class' : 'hoverException'}), + dict(name='_li'),dict(name='li'), + dict(attrs={'class' : 'box related-articles clear'}), + dict(attrs={'class' : 'news-list'}), + ] + keep_only_tags = [ + dict(name='h1'), + dict(attrs={'class' : 'publish-info'}), + # dict(name='h3'), + #dict(name='section',attrs={'class' : 'photo'}), + #dict(name='section',attrs={'class' : 'text-description'}), + + dict(attrs={'class' : 'clearfix hR new-style'}), + ] + + preprocess_regexps = [ + (re.compile(r'widget', re.IGNORECASE | re.DOTALL), lambda match: '')] + + preprocess_regexps = [ + (re.compile(r'