mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Financial Times
Fixes #1711934 [Financial times download uk/us is broken](https://bugs.launchpad.net/calibre/+bug/1711934)
This commit is contained in:
parent
331190c369
commit
e25426ad5a
@ -5,11 +5,10 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010-2017, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010-2017, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.ft.com/uk-edition
|
www.ft.com/todaysnewspaper/uk
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from collections import OrderedDict
|
|
||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
|
|
||||||
|
|
||||||
@ -34,22 +33,26 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
handle_gzip = True
|
handle_gzip = True
|
||||||
|
compress_news_images = True
|
||||||
|
scale_news_images_to_device = True
|
||||||
|
ignore_duplicate_articles = {'url'}
|
||||||
LOGIN = 'https://accounts.ft.com/login?location=https%3A%2F%2Fwww.ft.com%2F'
|
LOGIN = 'https://accounts.ft.com/login?location=https%3A%2F%2Fwww.ft.com%2F'
|
||||||
LOGOUT = 'https://myaccount.ft.com/logout'
|
LOGOUT = 'https://myaccount.ft.com/logout'
|
||||||
INDEX = 'http://www.ft.com/uk-edition'
|
INDEX = 'https://www.ft.com/todaysnewspaper/uk'
|
||||||
PREFIX = 'http://www.ft.com'
|
PREFIX = 'https://www.ft.com'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
classes(
|
classes('topper__headline topper__standfirst n-content-image--full article__time-byline article__body')
|
||||||
'article__header--wrapper article__time-byline article__body'
|
|
||||||
'n-content-image barrier-grid__heading article__time-byline topper__headline topper__standfirst')
|
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
classes('n-content-related-box tour-tip')
|
classes('n-content-related-box tour-tip n-content-recommended n-content-video')
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_attributes = ['width', 'height', 'lang', 'style']
|
extra_css = '''
|
||||||
|
body {font-family: Georgia,serif;}
|
||||||
|
img {display:block;}
|
||||||
|
'''
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
@ -81,33 +84,14 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
return cover
|
return cover
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = OrderedDict()
|
articles = []
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
# dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
|
|
||||||
# self.timefmt = ' [%s]'%dates
|
|
||||||
section_title = 'Untitled'
|
|
||||||
|
|
||||||
for column in soup.findAll('div', attrs={'class': 'feedBoxes clearfix'}):
|
for article in soup.findAll('a', href=True, attrs={'data-trackable':'main-link'}):
|
||||||
for section in column.findAll('div', attrs={'class': 'feedBox'}):
|
url = self.PREFIX + article['href']
|
||||||
sectiontitle = self.tag_to_string(section.find('h4'))
|
title = self.tag_to_string(article)
|
||||||
if '...' not in sectiontitle:
|
articles.append({'title': title, 'url': url, 'description': '', 'date': ''})
|
||||||
section_title = sectiontitle
|
return [("Articles", articles)]
|
||||||
self.log('Found section:', sectiontitle)
|
|
||||||
for article in section.ul.findAll('li'):
|
|
||||||
articles = []
|
|
||||||
title = self.tag_to_string(article.a)
|
|
||||||
url = article.a['href']
|
|
||||||
articles.append(
|
|
||||||
{'title': title, 'url': url, 'description': '', 'date': ''})
|
|
||||||
self.log('\tFound article:', title)
|
|
||||||
|
|
||||||
if articles:
|
|
||||||
if section_title not in feeds:
|
|
||||||
feeds[section_title] = []
|
|
||||||
feeds[section_title] += articles
|
|
||||||
|
|
||||||
ans = [(key, val) for key, val in feeds.iteritems()]
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for img in soup.findAll('img', srcset=True):
|
for img in soup.findAll('img', srcset=True):
|
||||||
|
@ -5,11 +5,10 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2010-2017, Darko Miletic <darko.miletic at gmail.com>'
|
__copyright__ = '2010-2017, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
'''
|
'''
|
||||||
www.ft.com/international-edition
|
www.ft.com/todaysnewspaper/international
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from collections import OrderedDict
|
|
||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
|
|
||||||
|
|
||||||
@ -34,20 +33,26 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
handle_gzip = True
|
handle_gzip = True
|
||||||
|
compress_news_images = True
|
||||||
|
scale_news_images_to_device = True
|
||||||
|
ignore_duplicate_articles = {'url'}
|
||||||
LOGIN = 'https://accounts.ft.com/login?location=https%3A%2F%2Fwww.ft.com%2F'
|
LOGIN = 'https://accounts.ft.com/login?location=https%3A%2F%2Fwww.ft.com%2F'
|
||||||
LOGOUT = 'https://myaccount.ft.com/logout'
|
LOGOUT = 'https://myaccount.ft.com/logout'
|
||||||
INDEX = 'http://www.ft.com/international-edition'
|
INDEX = 'https://www.ft.com/todaysnewspaper/international'
|
||||||
PREFIX = 'http://www.ft.com'
|
PREFIX = 'https://www.ft.com'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
classes('article__header--wrapper article__time-byline article__body n-content-image barrier-grid__heading')
|
classes('topper__headline topper__standfirst n-content-image--full article__time-byline article__body')
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
classes('n-content-related-box tour-tip')
|
classes('n-content-related-box tour-tip n-content-recommended n-content-video')
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_attributes = ['width', 'height', 'lang', 'style']
|
extra_css = '''
|
||||||
|
body {font-family: Georgia,serif;}
|
||||||
|
img {display:block;}
|
||||||
|
'''
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
@ -63,29 +68,14 @@ class FinancialTimes(BasicNewsRecipe):
|
|||||||
return br
|
return br
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
feeds = OrderedDict()
|
articles = []
|
||||||
soup = self.index_to_soup(self.INDEX)
|
soup = self.index_to_soup(self.INDEX)
|
||||||
section_title = 'Untitled'
|
|
||||||
|
|
||||||
for column in soup.findAll('div', attrs={'class': 'feedBoxes clearfix'}):
|
for article in soup.findAll('a', href=True, attrs={'data-trackable':'main-link'}):
|
||||||
for section in column.findAll('div', attrs={'class': 'feedBox'}):
|
url = self.PREFIX + article['href']
|
||||||
sectiontitle = self.tag_to_string(section.find('h4'))
|
title = self.tag_to_string(article)
|
||||||
if '...' not in sectiontitle:
|
articles.append({'title': title, 'url': url, 'description': '', 'date': ''})
|
||||||
section_title = sectiontitle
|
return [("Articles", articles)]
|
||||||
for article in section.ul.findAll('li'):
|
|
||||||
articles = []
|
|
||||||
title = self.tag_to_string(article.a)
|
|
||||||
url = article.a['href']
|
|
||||||
articles.append(
|
|
||||||
{'title': title, 'url': url, 'description': '', 'date': ''})
|
|
||||||
|
|
||||||
if articles:
|
|
||||||
if section_title not in feeds:
|
|
||||||
feeds[section_title] = []
|
|
||||||
feeds[section_title] += articles
|
|
||||||
|
|
||||||
ans = [(key, val) for key, val in feeds.iteritems()]
|
|
||||||
return ans
|
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for img in soup.findAll('img', srcset=True):
|
for img in soup.findAll('img', srcset=True):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user