From 717e8ccdd467bb0a72b0e153410c69c167280c18 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 19 Dec 2013 21:27:14 +0530 Subject: [PATCH] Update The Daily Mail --- recipes/daily_mail.recipe | 39 +++++++++++++-------------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/recipes/daily_mail.recipe b/recipes/daily_mail.recipe index ac2dfd1777..0a576f7012 100644 --- a/recipes/daily_mail.recipe +++ b/recipes/daily_mail.recipe @@ -8,6 +8,8 @@ class TheDailyMail(BasicNewsRecipe): author = 'RufusA and Sujata Raman' simultaneous_downloads= 1 max_articles_per_feed = 50 + use_embedded_content = False + auto_cleanup = True extra_css = '''#js-article-text{font-family:Arial,Helvetica,sans-serif;} h1{font-size:x-large; font-weight:bold;} @@ -18,33 +20,18 @@ class TheDailyMail(BasicNewsRecipe): ''' - remove_tags = [ dict(name='div', attrs={'class':['article-icon-links-container','print-or-mail-links cleared', - 'social-links cleared','explore-links','relatedItems','intellicrumbs box','travel','moduleHalf']}), - dict(name='div', attrs={'id':['link-unit-wrapper','pushdown-ad','reader-comments','googleAds',]}), - dict(name='h3', attrs={'class':['social-links-title']}), - dict(name='span', attrs={'class':['clickToEnlargeTop']}), - ] - #remove_tags_before = dict(name='div', attrs={'id':'content'}) - keep_only_tags = [dict(name='div', attrs={'id':'js-article-text'})] - no_stylesheets = True feeds = [ - (u'Home', u'http://www.dailymail.co.uk/home/index.rss'), - (u'News', u'http://www.dailymail.co.uk/news/index.rss'), - (u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'), - (u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'), - (u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'), - (u'Health', u'http://www.dailymail.co.uk/health/index.rss'), - (u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'), - (u'Money', u'http://www.dailymail.co.uk/money/index.rss'), - (u'Property', u'http://www.dailymail.co.uk/property/index.rss'), - (u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'), - (u'Travel', u'http://www.dailymail.co.uk/travel/index.rss') + (u'Home', u'http://www.dailymail.co.uk/home/index.rss'), + (u'News', u'http://www.dailymail.co.uk/news/index.rss'), + (u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'), + (u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'), + (u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'), + (u'Health', u'http://www.dailymail.co.uk/health/index.rss'), + (u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'), + (u'Money', u'http://www.dailymail.co.uk/money/index.rss'), + (u'Property', u'http://www.dailymail.co.uk/property/index.rss'), + (u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'), + (u'Travel', u'http://www.dailymail.co.uk/travel/index.rss') ] - - #def print_version(self, url): - # main = url.partition('?')[0] - # return main + '?printingPage=true' - -