mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-04 03:27:00 -05:00 
			
		
		
		
	Merge branch 'patch-1' of https://github.com/bobbysteel/calibre
This commit is contained in:
		
						commit
						50ca78b7a5
					
				@ -1,113 +0,0 @@
 | 
				
			|||||||
#!/usr/bin/env  python2
 | 
					 | 
				
			||||||
# -*- mode: python -*-
 | 
					 | 
				
			||||||
# -*- coding: utf-8 -*-
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__license__ = 'GPL v3'
 | 
					 | 
				
			||||||
__copyright__ = '2010-2017, Darko Miletic <darko.miletic at gmail.com>'
 | 
					 | 
				
			||||||
'''
 | 
					 | 
				
			||||||
www.ft.com/todaysnewspaper/uk
 | 
					 | 
				
			||||||
'''
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from calibre.web.feeds.news import BasicNewsRecipe
 | 
					 | 
				
			||||||
from urllib import unquote
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def classes(classes):
 | 
					 | 
				
			||||||
    q = frozenset(classes.split(' '))
 | 
					 | 
				
			||||||
    return dict(attrs={
 | 
					 | 
				
			||||||
        'class': lambda x: x and frozenset(x.split()).intersection(q)})
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class FinancialTimes(BasicNewsRecipe):
 | 
					 | 
				
			||||||
    title = 'Financial Times (UK)'
 | 
					 | 
				
			||||||
    __author__ = 'Darko Miletic'
 | 
					 | 
				
			||||||
    description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy."  # noqa
 | 
					 | 
				
			||||||
    publisher = 'The Financial Times Ltd.'
 | 
					 | 
				
			||||||
    category = 'news, finances, politics, UK, World'
 | 
					 | 
				
			||||||
    oldest_article = 2
 | 
					 | 
				
			||||||
    language = 'en_GB'
 | 
					 | 
				
			||||||
    max_articles_per_feed = 250
 | 
					 | 
				
			||||||
    no_stylesheets = True
 | 
					 | 
				
			||||||
    use_embedded_content = False
 | 
					 | 
				
			||||||
    needs_subscription = True
 | 
					 | 
				
			||||||
    encoding = 'utf8'
 | 
					 | 
				
			||||||
    publication_type = 'newspaper'
 | 
					 | 
				
			||||||
    handle_gzip = True
 | 
					 | 
				
			||||||
    compress_news_images = True
 | 
					 | 
				
			||||||
    scale_news_images_to_device = True
 | 
					 | 
				
			||||||
    ignore_duplicate_articles = {'url'}
 | 
					 | 
				
			||||||
    LOGIN = 'https://accounts.ft.com/login?location=https%3A%2F%2Fwww.ft.com%2F'
 | 
					 | 
				
			||||||
    LOGOUT = 'https://myaccount.ft.com/logout'
 | 
					 | 
				
			||||||
    INDEX = 'https://www.ft.com/todaysnewspaper/uk'
 | 
					 | 
				
			||||||
    PREFIX = 'https://www.ft.com'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    keep_only_tags = [
 | 
					 | 
				
			||||||
        classes('topper__headline topper__standfirst n-content-image--full article__time-byline article__body')
 | 
					 | 
				
			||||||
    ]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    remove_tags = [
 | 
					 | 
				
			||||||
        classes('n-content-related-box tour-tip n-content-recommended n-content-video'),
 | 
					 | 
				
			||||||
        dict(name=['aside'])
 | 
					 | 
				
			||||||
    ]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    extra_css = '''
 | 
					 | 
				
			||||||
                body {font-family: Georgia,serif;}
 | 
					 | 
				
			||||||
                img {display:block;}
 | 
					 | 
				
			||||||
                '''
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_browser(self):
 | 
					 | 
				
			||||||
        br = BasicNewsRecipe.get_browser(self)
 | 
					 | 
				
			||||||
        br.open(self.INDEX)
 | 
					 | 
				
			||||||
        if self.username is not None and self.password is not None:
 | 
					 | 
				
			||||||
            br.open(self.LOGIN)
 | 
					 | 
				
			||||||
            br.select_form(name='enter-email-form')
 | 
					 | 
				
			||||||
            br['email'] = self.username
 | 
					 | 
				
			||||||
            br.submit()
 | 
					 | 
				
			||||||
            br.select_form(name='enter-password-form')
 | 
					 | 
				
			||||||
            br['password'] = self.password
 | 
					 | 
				
			||||||
            br.submit()
 | 
					 | 
				
			||||||
        return br
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_cover_url(self):
 | 
					 | 
				
			||||||
        from datetime import date
 | 
					 | 
				
			||||||
        cover = 'http://img.kiosko.net/' + str(date.today().year) + '/' + date.today().strftime('%m') + '/' + date.today().strftime('%d') + '/uk/ft_uk.750.jpg'
 | 
					 | 
				
			||||||
        br = BasicNewsRecipe.get_browser(self)
 | 
					 | 
				
			||||||
        try:
 | 
					 | 
				
			||||||
            br.open(cover)
 | 
					 | 
				
			||||||
        except:
 | 
					 | 
				
			||||||
            index = 'http://en.kiosko.net/uk/np/ft_uk.html'
 | 
					 | 
				
			||||||
            soup = self.index_to_soup(index)
 | 
					 | 
				
			||||||
            for image in soup.findAll('img', src=True):
 | 
					 | 
				
			||||||
                if image['src'].endswith('750.jpg'):
 | 
					 | 
				
			||||||
                    return image['src']
 | 
					 | 
				
			||||||
            self.log("\nCover unavailable")
 | 
					 | 
				
			||||||
            cover = None
 | 
					 | 
				
			||||||
        return cover
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def parse_index(self):
 | 
					 | 
				
			||||||
        articles = []
 | 
					 | 
				
			||||||
        soup = self.index_to_soup(self.INDEX)
 | 
					 | 
				
			||||||
        totalfeeds = []
 | 
					 | 
				
			||||||
        current_section = []
 | 
					 | 
				
			||||||
        div = []
 | 
					 | 
				
			||||||
        for div in soup.findAll('div', attrs={'data-trackable': 'list'}):
 | 
					 | 
				
			||||||
            articles = []
 | 
					 | 
				
			||||||
            current_section = self.tag_to_string(div.find('h2'))
 | 
					 | 
				
			||||||
            self.log('in section: ', current_section)
 | 
					 | 
				
			||||||
            for article in div.findAll('a', href=True, attrs={'data-trackable':'main-link'}):
 | 
					 | 
				
			||||||
                url = self.PREFIX + article['href']
 | 
					 | 
				
			||||||
                title = self.tag_to_string(article)
 | 
					 | 
				
			||||||
                articles.append({'title': title, 'url': url, 'description': '', 'date': ''})
 | 
					 | 
				
			||||||
                self.log('title: ', title, ' url: ', url)
 | 
					 | 
				
			||||||
            totalfeeds.append((current_section,articles))
 | 
					 | 
				
			||||||
        return totalfeeds
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def preprocess_html(self, soup):
 | 
					 | 
				
			||||||
        for img in soup.findAll('img', srcset=True):
 | 
					 | 
				
			||||||
            src = img['srcset'].split(',')[0].strip()
 | 
					 | 
				
			||||||
            src = unquote(src.rpartition('/')[2].partition('?')[0])
 | 
					 | 
				
			||||||
            img['src'] = src
 | 
					 | 
				
			||||||
        return soup
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def cleanup(self):
 | 
					 | 
				
			||||||
        self.browser.open(self.LOGOUT)
 | 
					 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user