calibre/recipes/debunkingdenialism.recipe

#!/usr/bin/env python
# vim:fileencoding=utf-8

from calibre.web.feeds.news import BasicNewsRecipe


class Bellingcat(BasicNewsRecipe):
    title = 'Debunking Denialism'
    __author__ = 'bugmen00t'
    description = 'Debunking Denialism is a website dedicated to the refutation of pseudoscience and denialism by applying scientific skepticism and defending evidence-based science. Fighting pseudoscience and quackery with reason and evidence.'  # noqa: E501
    publisher = 'Debunking Denialism'
    category = 'blog'
    cover_url = u'https://i0.wp.com/debunkingdenialism.com/wp-content/uploads/2017/06/cropped-newestblavatar.jpg'
    language = 'en'
    no_stylesheets = True
    remove_javascript = False
    auto_cleanup = False
    oldest_article = 60
    max_articles_per_feed = 10

    remove_tags_before = dict(name='article')

    remove_tags_after = dict(name='article')

    remove_tags = [
        dict(name='div', attrs={'class': 'above-entry-meta'}),
        dict(name='time', attrs={'class': 'updated'}),
        dict(name='p', attrs={'class': 'ddtag'}),
        dict(name='div', attrs={'class': 'sharedaddy sd-sharing-enabled'}),
        dict(
            name='div',
            attrs={
                'class':
                'sharedaddy sd-block sd-like jetpack-likes-widget-wrapper jetpack-likes-widget-unloaded'
            }
        )
    ]

    feeds = [('Debunking Denialism', 'https://debunkingdenialism.com/feed/')]

    def preprocess_html(self, soup):
        for img in soup.findAll('img', attrs={'data-orig-file': True}):
            img['src'] = img['data-orig-file']
        return soup