#!/usr/bin/env python # vim:fileencoding=utf-8 # License: GPLv3 Copyright: 2016, Kovid Goyal from __future__ import absolute_import, division, print_function, unicode_literals from calibre.web.feeds.recipes import BasicNewsRecipe def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={ 'class': lambda x: x and frozenset(x.split()).intersection(q)}) class E1843(BasicNewsRecipe): title = '1843' __author__ = 'Kovid Goyal' description = 'The ideas, culture and lifestyle magazine from The Economist' language = 'en_GB' no_stylesheets = True remove_javascript = True encoding = 'utf-8' keep_only_tags = [ dict(id='content') ] remove_tags = [ classes('advert ad ds-share-list article__wordmark related-articles newsletter-signup') ] def parse_index(self): soup = self.index_to_soup('https://economist.com/1843') ans = [] for a in soup.findAll(**classes('headline-link')): url = a['href'] if url.startswith('/'): url = 'https://economist.com' + url title = self.tag_to_string(a) self.log(title, ' at ', url) desc = '' d = a.parent.findNextSibling(itemprop='description') if d is not None: desc = self.tag_to_string(d) ans.append({'title': title, 'url': url, 'description': desc}) return [('Articles', ans)] def postprocess_html(self, soup, *a): main = soup.find(id='content') header = soup.find(**classes('article__header')) header.extract() main.insert(0, header) return soup