mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-25 15:52:25 -04:00 
			
		
		
		
	Merge branch 'patch-3' of https://github.com/bobbysteel/calibre
This commit is contained in:
		
						commit
						c3d16d3a47
					
				| @ -1,97 +0,0 @@ | ||||
| #!/usr/bin/env  python2 | ||||
| # -*- mode: python -*- | ||||
| # -*- coding: utf-8 -*- | ||||
| 
 | ||||
| __license__ = 'GPL v3' | ||||
| __copyright__ = '2010-2017, Darko Miletic <darko.miletic at gmail.com>' | ||||
| ''' | ||||
| www.ft.com/todaysnewspaper/international | ||||
| ''' | ||||
| 
 | ||||
| from calibre.web.feeds.news import BasicNewsRecipe | ||||
| from urllib import unquote | ||||
| 
 | ||||
| 
 | ||||
| def classes(classes): | ||||
|     q = frozenset(classes.split(' ')) | ||||
|     return dict(attrs={ | ||||
|         'class': lambda x: x and frozenset(x.split()).intersection(q)}) | ||||
| 
 | ||||
| 
 | ||||
| class FinancialTimes(BasicNewsRecipe): | ||||
|     title = 'Financial Times (International) printed edition' | ||||
|     __author__ = 'Darko Miletic' | ||||
|     description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy."  # noqa | ||||
|     publisher = 'The Financial Times Ltd.' | ||||
|     category = 'news, finances, politics, World' | ||||
|     oldest_article = 2 | ||||
|     language = 'en' | ||||
|     max_articles_per_feed = 250 | ||||
|     no_stylesheets = True | ||||
|     use_embedded_content = False | ||||
|     needs_subscription = True | ||||
|     encoding = 'utf8' | ||||
|     publication_type = 'newspaper' | ||||
|     handle_gzip = True | ||||
|     compress_news_images = True | ||||
|     scale_news_images_to_device = True | ||||
|     ignore_duplicate_articles = {'url'} | ||||
|     LOGIN = 'https://accounts.ft.com/login?location=https%3A%2F%2Fwww.ft.com%2F' | ||||
|     LOGOUT = 'https://myaccount.ft.com/logout' | ||||
|     INDEX = 'https://www.ft.com/todaysnewspaper/international' | ||||
|     PREFIX = 'https://www.ft.com' | ||||
| 
 | ||||
|     keep_only_tags = [ | ||||
|         classes('topper__headline topper__standfirst n-content-image--full article__time-byline article__body') | ||||
|     ] | ||||
| 
 | ||||
|     remove_tags = [ | ||||
|         classes('n-content-related-box tour-tip n-content-recommended n-content-video'), | ||||
|         dict(name=['aside']) | ||||
|     ] | ||||
| 
 | ||||
|     extra_css = ''' | ||||
|                 body {font-family: Georgia,serif;} | ||||
|                 img {display:block;} | ||||
|                 ''' | ||||
| 
 | ||||
|     def get_browser(self): | ||||
|         br = BasicNewsRecipe.get_browser(self) | ||||
|         br.open(self.INDEX) | ||||
|         if self.username is not None and self.password is not None: | ||||
|             br.open(self.LOGIN) | ||||
|             br.select_form(name='enter-email-form') | ||||
|             br['email'] = self.username | ||||
|             br.submit() | ||||
|             br.select_form(name='enter-password-form') | ||||
|             br['password'] = self.password | ||||
|             br.submit() | ||||
|         return br | ||||
| 
 | ||||
|     def parse_index(self): | ||||
|         articles = [] | ||||
|         soup = self.index_to_soup(self.INDEX) | ||||
|         totalfeeds = [] | ||||
|         current_section = [] | ||||
|         div = [] | ||||
|         for div in soup.findAll('div', attrs={'data-trackable': 'list'}): | ||||
|             articles = [] | ||||
|             current_section = self.tag_to_string(div.find('h2')) | ||||
|             self.log('in section: ', current_section) | ||||
|             for article in div.findAll('a', href=True, attrs={'data-trackable':'main-link'}): | ||||
|                 url = self.PREFIX + article['href'] | ||||
|                 title = self.tag_to_string(article) | ||||
|                 articles.append({'title': title, 'url': url, 'description': '', 'date': ''}) | ||||
|                 self.log('title: ', title, ' url: ', url) | ||||
|             totalfeeds.append((current_section,articles)) | ||||
|         return totalfeeds | ||||
| 
 | ||||
|     def preprocess_html(self, soup): | ||||
|         for img in soup.findAll('img', srcset=True): | ||||
|             src = img['srcset'].split(',')[0].strip() | ||||
|             src = unquote(src.rpartition('/')[2].partition('?')[0]) | ||||
|             img['src'] = src | ||||
|         return soup | ||||
| 
 | ||||
|     def cleanup(self): | ||||
|         self.browser.open(self.LOGOUT) | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user