From 1f82d44d29861c23660f3b39c4db56f92b131926 Mon Sep 17 00:00:00 2001 From: bobbysteel Date: Mon, 29 Jan 2018 15:39:38 +0000 Subject: [PATCH] Delete - CAPTCHA blocked now --- recipes/financial_times_us.recipe | 97 ------------------------------- 1 file changed, 97 deletions(-) delete mode 100644 recipes/financial_times_us.recipe diff --git a/recipes/financial_times_us.recipe b/recipes/financial_times_us.recipe deleted file mode 100644 index bfc57aac53..0000000000 --- a/recipes/financial_times_us.recipe +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env python2 -# -*- mode: python -*- -# -*- coding: utf-8 -*- - -__license__ = 'GPL v3' -__copyright__ = '2010-2017, Darko Miletic ' -''' -www.ft.com/todaysnewspaper/international -''' - -from calibre.web.feeds.news import BasicNewsRecipe -from urllib import unquote - - -def classes(classes): - q = frozenset(classes.split(' ')) - return dict(attrs={ - 'class': lambda x: x and frozenset(x.split()).intersection(q)}) - - -class FinancialTimes(BasicNewsRecipe): - title = 'Financial Times (International) printed edition' - __author__ = 'Darko Miletic' - description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy." # noqa - publisher = 'The Financial Times Ltd.' - category = 'news, finances, politics, World' - oldest_article = 2 - language = 'en' - max_articles_per_feed = 250 - no_stylesheets = True - use_embedded_content = False - needs_subscription = True - encoding = 'utf8' - publication_type = 'newspaper' - handle_gzip = True - compress_news_images = True - scale_news_images_to_device = True - ignore_duplicate_articles = {'url'} - LOGIN = 'https://accounts.ft.com/login?location=https%3A%2F%2Fwww.ft.com%2F' - LOGOUT = 'https://myaccount.ft.com/logout' - INDEX = 'https://www.ft.com/todaysnewspaper/international' - PREFIX = 'https://www.ft.com' - - keep_only_tags = [ - classes('topper__headline topper__standfirst n-content-image--full article__time-byline article__body') - ] - - remove_tags = [ - classes('n-content-related-box tour-tip n-content-recommended n-content-video'), - dict(name=['aside']) - ] - - extra_css = ''' - body {font-family: Georgia,serif;} - img {display:block;} - ''' - - def get_browser(self): - br = BasicNewsRecipe.get_browser(self) - br.open(self.INDEX) - if self.username is not None and self.password is not None: - br.open(self.LOGIN) - br.select_form(name='enter-email-form') - br['email'] = self.username - br.submit() - br.select_form(name='enter-password-form') - br['password'] = self.password - br.submit() - return br - - def parse_index(self): - articles = [] - soup = self.index_to_soup(self.INDEX) - totalfeeds = [] - current_section = [] - div = [] - for div in soup.findAll('div', attrs={'data-trackable': 'list'}): - articles = [] - current_section = self.tag_to_string(div.find('h2')) - self.log('in section: ', current_section) - for article in div.findAll('a', href=True, attrs={'data-trackable':'main-link'}): - url = self.PREFIX + article['href'] - title = self.tag_to_string(article) - articles.append({'title': title, 'url': url, 'description': '', 'date': ''}) - self.log('title: ', title, ' url: ', url) - totalfeeds.append((current_section,articles)) - return totalfeeds - - def preprocess_html(self, soup): - for img in soup.findAll('img', srcset=True): - src = img['srcset'].split(',')[0].strip() - src = unquote(src.rpartition('/')[2].partition('?')[0]) - img['src'] = src - return soup - - def cleanup(self): - self.browser.open(self.LOGOUT)