Update The Daily Mail

2025-06-23 15:30:45 -04:00 · 2017-05-22 09:33:53 +05:30 · 2017-05-22 09:33:53 +05:30 · b273f97cff
commit b273f97cff
parent ea87f0ba68
1 changed files with 31 additions and 13 deletions
--- a/recipes/daily_mail.recipe
+++ b/recipes/daily_mail.recipe
@ -1,30 +1,40 @@
 #!/usr/bin/env python2
 # vim:fileencoding=utf-8
 # License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 from calibre.web.feeds.news import BasicNewsRecipe
 def classes(classes):
    q = frozenset(classes.split(' '))
    return dict(attrs={
        'class': lambda x: x and frozenset(x.split()).intersection(q)})
 class TheDailyMail(BasicNewsRecipe):
    title = u'The Daily Mail'
    oldest_article = 2
    language = 'en_GB'
-    author = 'RufusA and Sujata Raman'
+    __author__ = 'Kovid Goyal'
    simultaneous_downloads = 1
    max_articles_per_feed = 50
    use_embedded_content = False
    auto_cleanup = True
    compress_news_images = True
-    compress_news_images_max_size = 15
+    compress_news_images_auto_size = 8
    extra_css = '''#js-article-text{font-family:Arial,Helvetica,sans-serif;}
                    h1{font-size:x-large; font-weight:bold;}
                    a.author{color:#003580;}
                    .js-article-text{font-size:50%;}
                    .imageCaption{font-size:x-small; font-weight:bold}
                '''
    no_stylesheets = True
    keep_only_tags = [
        dict(name='h1'),
        classes('author-section byline-section'),
        dict(itemprop='articleBody'),
    ]
    remove_tags = [
        classes('related-carousel')
    ]
    feeds = [
        (u'Home', u'http://www.dailymail.co.uk/home/index.rss'),
        (u'News', u'http://www.dailymail.co.uk/news/index.rss'),
@ -38,3 +48,11 @@ class TheDailyMail(BasicNewsRecipe):
        (u'Motoring', u'http://www.dailymail.co.uk/motoring/index.rss'),
        (u'Travel', u'http://www.dailymail.co.uk/travel/index.rss')
    ]
    def preprocess_html(self, soup):
        for img in soup.findAll(attrs={'data-src':True}):
            img['src'] = img['data-src']
        all_h1s = soup.findAll('h1')
        for h1 in all_h1s[1:]:
            h1.extract()
        return soup