diff --git a/recipes/daily_mail.recipe b/recipes/daily_mail.recipe index ac2dfd1777..0a576f7012 100644 --- a/recipes/daily_mail.recipe +++ b/recipes/daily_mail.recipe @@ -8,6 +8,8 @@ class TheDailyMail(BasicNewsRecipe): author = 'RufusA and Sujata Raman' simultaneous_downloads= 1 max_articles_per_feed = 50 + use_embedded_content = False + auto_cleanup = True extra_css = '''#js-article-text{font-family:Arial,Helvetica,sans-serif;} h1{font-size:x-large; font-weight:bold;} @@ -18,33 +20,18 @@ class TheDailyMail(BasicNewsRecipe): ''' - remove_tags = [ dict(name='div', attrs={'class':['article-icon-links-container','print-or-mail-links cleared', - 'social-links cleared','explore-links','relatedItems','intellicrumbs box','travel','moduleHalf']}), - dict(name='div', attrs={'id':['link-unit-wrapper','pushdown-ad','reader-comments','googleAds',]}), - dict(name='h3', attrs={'class':['social-links-title']}), - dict(name='span', attrs={'class':['clickToEnlargeTop']}), - ] - #remove_tags_before = dict(name='div', attrs={'id':'content'}) - keep_only_tags = [dict(name='div', attrs={'id':'js-article-text'})] - no_stylesheets = True feeds = [ - (u'Home', u'http://www.dailymail.co.uk/home/index.rss'), - (u'News', u'http://www.dailymail.co.uk/news/index.rss'), - (u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'), - (u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'), - (u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'), - (u'Health', u'http://www.dailymail.co.uk/health/index.rss'), - (u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'), - (u'Money', u'http://www.dailymail.co.uk/money/index.rss'), - (u'Property', u'http://www.dailymail.co.uk/property/index.rss'), - (u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'), - (u'Travel', u'http://www.dailymail.co.uk/travel/index.rss') + (u'Home', u'http://www.dailymail.co.uk/home/index.rss'), + (u'News', u'http://www.dailymail.co.uk/news/index.rss'), + (u'Sport', u'http://www.dailymail.co.uk/sport/index.rss'), + (u'TV and Showbiz', u'http://www.dailymail.co.uk/tvshowbiz/index.rss'), + (u'Femail', u'http://www.dailymail.co.uk/femail/index.rss'), + (u'Health', u'http://www.dailymail.co.uk/health/index.rss'), + (u'Science and Technology', u'http://www.dailymail.co.uk/sciencetech/index.rss'), + (u'Money', u'http://www.dailymail.co.uk/money/index.rss'), + (u'Property', u'http://www.dailymail.co.uk/property/index.rss'), + (u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'), + (u'Travel', u'http://www.dailymail.co.uk/travel/index.rss') ] - - #def print_version(self, url): - # main = url.partition('?')[0] - # return main + '?printingPage=true' - -