Update The Economist

This commit is contained in:
Kovid Goyal 2017-11-26 08:47:48 +05:30
parent 0a8e26071a
commit 8f515a6cc7
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 16 additions and 22 deletions

View File

@ -6,7 +6,6 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
economist.com economist.com
''' '''
import cookielib import cookielib
import re
from collections import OrderedDict from collections import OrderedDict
from calibre.ebooks.BeautifulSoup import NavigableString, Tag from calibre.ebooks.BeautifulSoup import NavigableString, Tag
@ -40,7 +39,7 @@ class Economist(BasicNewsRecipe):
INDEX = 'https://www.economist.com/printedition' INDEX = 'https://www.economist.com/printedition'
description = ( description = (
'Global news and current affairs from a European' 'Global news and current affairs from a European'
' perspective. Best downloaded on Friday mornings (GMT)' ' perspective. Best downloaded on Saturday mornings (GMT)'
) )
extra_css = ''' extra_css = '''
.headline {font-size: x-large;} .headline {font-size: x-large;}
@ -82,7 +81,7 @@ class Economist(BasicNewsRecipe):
), ),
dict(attrs={ dict(attrs={
'class': lambda x: x and 'blog-post__siblings-list-aside' in x.split()}), 'class': lambda x: x and 'blog-post__siblings-list-aside' in x.split()}),
classes('share-links-header teaser--wrapped latest-updates-panel__container latest-updates-panel__article-link blog-post__section'), classes('share-links-header teaser--wrapped latest-updates-panel__container latest-updates-panel__article-link blog-post__section newsletter-form'),
] ]
keep_only_tags = [dict(name='article', id=lambda x: not x)] keep_only_tags = [dict(name='article', id=lambda x: not x)]
no_stylesheets = True no_stylesheets = True
@ -166,15 +165,13 @@ class Economist(BasicNewsRecipe):
return ans return ans
def economist_parse_index(self, soup): def economist_parse_index(self, soup):
img = soup.find(attrs={'src': True, 'class': 'print-edition__cover-widget__image'}) img = soup.find(attrs={'srcset': True, 'class': lambda x: x and 'print-edition__cover-widget__image' in x.split()})
if img is not None: if img is not None:
self.cover_url = process_url(img['src'], False) for part in img['srcset'].split():
else: if part.startswith('//'):
div = soup.find('div', attrs={'class': 'issue-image'}) self.cover_url = 'https:' + part
if div is not None: break
img = div.find('img', src=True)
if img is not None:
self.cover_url = re.sub('thumbnail', 'full', img['src'])
sections = soup.findAll( sections = soup.findAll(
'div', attrs={'class': 'list__title', 'div', attrs={'class': 'list__title',
'data-reactid': True} 'data-reactid': True}

View File

@ -6,7 +6,6 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
economist.com economist.com
''' '''
import cookielib import cookielib
import re
from collections import OrderedDict from collections import OrderedDict
from calibre.ebooks.BeautifulSoup import NavigableString, Tag from calibre.ebooks.BeautifulSoup import NavigableString, Tag
@ -40,7 +39,7 @@ class Economist(BasicNewsRecipe):
INDEX = 'https://www.economist.com/printedition' INDEX = 'https://www.economist.com/printedition'
description = ( description = (
'Global news and current affairs from a European' 'Global news and current affairs from a European'
' perspective. Best downloaded on Friday mornings (GMT)' ' perspective. Best downloaded on Saturday mornings (GMT)'
) )
extra_css = ''' extra_css = '''
.headline {font-size: x-large;} .headline {font-size: x-large;}
@ -82,7 +81,7 @@ class Economist(BasicNewsRecipe):
), ),
dict(attrs={ dict(attrs={
'class': lambda x: x and 'blog-post__siblings-list-aside' in x.split()}), 'class': lambda x: x and 'blog-post__siblings-list-aside' in x.split()}),
classes('share-links-header teaser--wrapped latest-updates-panel__container latest-updates-panel__article-link blog-post__section'), classes('share-links-header teaser--wrapped latest-updates-panel__container latest-updates-panel__article-link blog-post__section newsletter-form'),
] ]
keep_only_tags = [dict(name='article', id=lambda x: not x)] keep_only_tags = [dict(name='article', id=lambda x: not x)]
no_stylesheets = True no_stylesheets = True
@ -166,15 +165,13 @@ class Economist(BasicNewsRecipe):
return ans return ans
def economist_parse_index(self, soup): def economist_parse_index(self, soup):
img = soup.find(attrs={'src': True, 'class': 'print-edition__cover-widget__image'}) img = soup.find(attrs={'srcset': True, 'class': lambda x: x and 'print-edition__cover-widget__image' in x.split()})
if img is not None: if img is not None:
self.cover_url = process_url(img['src'], False) for part in img['srcset'].split():
else: if part.startswith('//'):
div = soup.find('div', attrs={'class': 'issue-image'}) self.cover_url = 'https:' + part
if div is not None: break
img = div.find('img', src=True)
if img is not None:
self.cover_url = re.sub('thumbnail', 'full', img['src'])
sections = soup.findAll( sections = soup.findAll(
'div', attrs={'class': 'list__title', 'div', attrs={'class': 'list__title',
'data-reactid': True} 'data-reactid': True}