diff --git a/recipes/daily_express.recipe b/recipes/daily_express.recipe index 208fbc7172..dc55d3f0b8 100644 --- a/recipes/daily_express.recipe +++ b/recipes/daily_express.recipe @@ -1,10 +1,9 @@ -from calibre.web.feeds.news import BasicNewsRecipe import re +from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1376229553(BasicNewsRecipe): title = u'Daily Express' __author__ = 'Dave Asbury' - language = 'en_GB' - # 13-08-17 remove quoted text from article + # 9-9-13 added article author and now use (re.compile(r'>[\w].+? News<' encoding = 'utf-8' remove_empty_feeds = True #remove_javascript = True @@ -15,33 +14,49 @@ class AdvancedUserRecipe1376229553(BasicNewsRecipe): compress_news_images = True compress_news_images_max_size = 30 ignore_duplicate_articles = {'title', 'url'} + masthead_url = 'http://cdn.images.dailyexpress.co.uk/img/page/express_logo.png' + + preprocess_regexps = [ + + (re.compile(r'widget', re.IGNORECASE | re.DOTALL), lambda match: ''), + (re.compile(r'Related articles', re.IGNORECASE | re.DOTALL), lambda match: ''), + (re.compile(r'Add Your Comment<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + (re.compile(r'>More [\w].+?<', re.IGNORECASE), lambda match: '><'), + (re.compile(r'>[\w].+? News<', re.IGNORECASE), lambda match: '><'), + #(re.compile(r'Health News<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'Car News<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'TV & Radio News<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'Food & Recipe News<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'More City & Business<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'Travel News<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'Garden News<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'Fashion & Beauty News<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'More Personal Finance<', re.IGNORECASE | re.DOTALL), lambda match: '<'), + #(re.compile(r'