calibre/recipes/barrons.recipe

#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals

from calibre.web.feeds.news import BasicNewsRecipe


def classes(classes):
    q = frozenset(classes.split(' '))
    return dict(attrs={
        'class': lambda x: x and frozenset(x.split()).intersection(q)})


class BarronsMagazine(BasicNewsRecipe):
    title = 'Barron\'s Magazine'
    __author__ = 'Kovid Goyal'
    description = 'Financial news from the publisher of the WSJ'
    language = 'en'

    keep_only_tags = [
        dict(name='h1'),
        dict(id='js-article__body'),
    ]

    def parse_index(self):
        soup = self.index_to_soup('https://www.barrons.com/magazine')
        articles = []
        for art in soup.findAll('article'):
            h = art.find(['h2', 'h3'])
            a = h.find('a')
            title = self.tag_to_string(a)
            url = a['href']
            desc = ''
            p = art.find('p', attrs={'class': lambda x: x and ('_summary_' in x or '_byline_' in x)})
            if p:
                desc += self.tag_to_string(p)
            articles.append({'title': title, 'url': url, 'description': desc})
        return [('Articles', articles)]