#!/usr/bin/env python # vim:fileencoding=utf-8 from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2015, Kovid Goyal ' from calibre.web.feeds.news import BasicNewsRecipe class EntrepeneurMagRecipe(BasicNewsRecipe): __author__ = 'Kovid Goyal' language = 'en' title = u'Entrepeneur Magazine' publisher = u'Entrepreneur Media, Inc' category = u'Business' description = u'Online magazine on business, small business, management and economy' encoding = 'utf-8' no_stylesheets = True remove_javascript = True keep_only_tags = [ dict(attrs={'data-word-count': True}), ] remove_tags = [ dict(attrs={'class': ['related-content']}), ] remove_attributes = ['style'] INDEX = 'https://www.entrepreneur.com' def parse_index(self): soup = self.index_to_soup(self.INDEX + '/latest') articles = [] for h3 in soup.findAll('h3'): a = h3.parent if a.name == 'a' and a.get('href'): url = self.INDEX + a['href'] title = self.tag_to_string(h3) desc = '' if a.next_sibling and a.next_sibling.name == 'p': desc = self.tag_to_string(a.next_sibling) articles.append({'title': title, 'url': url, 'description': desc}) self.log(title, url) return [('Articles', articles)] def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] return soup