diff --git a/recipes/daily_express.recipe b/recipes/daily_express.recipe index 3a74f733f0..af7b237e7f 100644 --- a/recipes/daily_express.recipe +++ b/recipes/daily_express.recipe @@ -1,72 +1,38 @@ # vim:fileencoding=UTF-8 from __future__ import unicode_literals + import re + from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1376229553(BasicNewsRecipe): - title = 'Daily Express' +class AdvancedUserRecipe1390132023(BasicNewsRecipe): + title = u'Daily Express' __author__ = 'Dave Asbury' - # 9-9-13 added article author and now use (re.compile(r'>[\w].+? News<' - # 16-11-13 cover adjustment - # 19.1.14 changes due to website changes breaking recipe - encoding = 'utf-8' - language = 'en_GB' - remove_empty_feeds = True - remove_javascript = True - no_stylesheets = True + # 21.11.14 written due to website changes oldest_article = 1 max_articles_per_feed = 10 - #auto_cleanup = True compress_news_images = True compress_news_images_max_size = 30 ignore_duplicate_articles = {'title', 'url'} masthead_url = 'http://cdn.images.dailyexpress.co.uk/img/page/express_logo.png' - #conversion_options = { 'linearize_tables' : True } + auto_cleanup_keep = '//*[@class="author"]|//section[@class="photo changeSpace"]' + auto_cleanup = True + no_stylesheets = False - remove_tags = [ - dict(attrs={'class' : 'quote'}), - dict(attrs={'class' : 'mainFooter cf'}), - dict(name='footer'), - dict(attrs={'id' : 'header_addons'}), - dict(attrs={'class' : 'hoverException'}), - dict(name='_li'),dict(name='li'), - dict(attrs={'class' : 'box related-articles clear'}), - dict(attrs={'class' : 'news-list'}), - dict(attrs={'class' : 'sponsored-section'}), - dict(attrs={'class' : 'pull-quote on-right'}), - dict(attrs={'class' : 'pull-quote on-left'}), - - ] - remove_tags_after = [dict(attrs={'class' : 'clearfix hR new-style'})] - extra_css = ''' - h1{font-weight:bold;font-size:175%;} - h2{font-weight:normal;font-size:75%;} - #p{font-size:14px;} - #body{font-size:14px;} - .photo-caption {display: block;margin-left: auto;margin-right: auto;width:100%;font-size:40%;} - .publish-info {font-size:50%;} - .photo img {display: block;margin-left: auto;margin-right: auto;width:100%;} - ''' + preprocess_regexps = [ + (re.compile(r'\| [\w].+?\| [\w].+?\| Daily Express', re.IGNORECASE | re.DOTALL), lambda match: ''), + ] feeds = [ - ('UK News', 'http://www.express.co.uk/posts/rss/1/uk'), - ('World News','http://www.express.co.uk/posts/rss/78/world'), - ('Finance','http://www.express.co.uk/posts/rss/21/finance'), - ('Sport','http://www.express.co.uk/posts/rss/65/sport'), - ('Entertainment','http://www.express.co.uk/posts/rss/18/entertainment'), - ('Lifestyle','http://www.express.co.uk/posts/rss/8/life&style'), - ('Fun','http://www.express.co.uk/posts/rss/110/fun'), + (u'UK News', u'http://www.express.co.uk/posts/rss/1/uk'), + (u'World News',u'http://www.express.co.uk/posts/rss/78/world'), + (u'Finance',u'http://www.express.co.uk/posts/rss/21/finance'), + (u'Sport',u'http://www.express.co.uk/posts/rss/65/sport'), + (u'Entertainment',u'http://www.express.co.uk/posts/rss/18/entertainment'), + (u'Lifestyle',u'http://www.express.co.uk/posts/rss/8/life&style'), + (u'Fun',u'http://www.express.co.uk/posts/rss/110/fun'), ] - def preprocess_raw_html(self, raw_html, url): - for pat, f in [ - (re.compile(r'', re.DOTALL), lambda m: ''), - (re.compile(r'',re.DOTALL), lambda m: ''), - (re.compile(r'',re.DOTALL), lambda m: ''), - ]: - raw_html = pat.sub(f, raw_html) - return raw_html - def get_cover_url(self): soup = self.index_to_soup('http://www.express.co.uk/ourpaper/') cov = soup.find(attrs={'src' : re.compile('http://cdn.images.express.co.uk/img/covers/')}) @@ -76,6 +42,7 @@ class AdvancedUserRecipe1376229553(BasicNewsRecipe): cov=str(cov2) cov=cov[2:len(cov)-2] + # cover_url=cov br = self.get_browser() br.set_handle_redirect(False) try: @@ -86,4 +53,12 @@ class AdvancedUserRecipe1376229553(BasicNewsRecipe): return cover_url - + extra_css = ''' + #h1{font-weight:bold;font-size:175%;} + h2{display: block;margin-left: auto;margin-right: auto;width:100%;font-weight:bold;font-size:175%;} + #p{font-size:14px;} + #body{font-size:14px;} + .newsCaption {display: block;margin-left: auto;margin-right: auto;width:100%;font-size:40%;} + .publish-info {font-size:50%;} + .photo img {display: block;margin-left: auto;margin-right: auto;width:100%;} + '''