#!/usr/bin/env python2 # vim:fileencoding=utf-8 # License: GPLv3 Copyright: 2016, Kovid Goyal from __future__ import (unicode_literals, division, absolute_import, print_function) from calibre.web.feeds.recipes import BasicNewsRecipe def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={ 'class': lambda x: x and frozenset(x.split()).intersection(q)}) class E1843(BasicNewsRecipe): title = '1843' __author__ = 'Kovid Goyal' language = 'en' no_stylesheets = True remove_javascript = True oldest_article = 365 encoding = 'utf-8' # feeds = [ # 'https://www.1843magazine.com/rss/content', # ] keep_only_tags = [ dict(name='h1', attrs={'class': lambda x: x and 'title' in x.split()}), classes('field-name-field-rubric-summary article-header__overlay-main-image meta-info__author article__body'), ] def parse_index(self): soup = self.index_to_soup('https://www.1843magazine.com') a = soup.find(text='Print edition').parent soup = self.index_to_soup(a['href']) h1 = soup.find(**classes('cover-image__main')) self.timefmt = ' [%s]' % self.tag_to_string(h1) img = soup.find(**classes('cover-image__image')).find('img') self.cover_url = img['src'] ans = [] current_section = articles = None for div in soup.findAll(**classes('field-name-field-header node-article')): if 'field-header' in ''.join(div['class']): if current_section and articles: ans.append((current_section, articles)) current_section = self.tag_to_string(div) self.log(current_section) articles = [] else: a = div.find('a', href=True) title = self.tag_to_string(a) url = a['href'] self.log('\t', title, ' at ', url) desc = '' r = div.find(**classes('article-rubric')) if r is not None: desc = self.tag_to_string(r) articles.append( {'title': title, 'url': url, 'description': desc}) if current_section and articles: ans.append((current_section, articles)) return ans