Update Newsweek

This commit is contained in:
Kovid Goyal 2018-11-22 10:43:53 +05:30
parent 2a26bb7be4
commit fb380aaaff
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,7 +1,7 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from collections import defaultdict from collections import defaultdict
BASE = 'http://www.newsweek.com' BASE = 'https://www.newsweek.com'
def href_to_url(a, add_piano=False): def href_to_url(a, add_piano=False):
@ -23,15 +23,18 @@ class Newsweek(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
requires_version = (1, 40, 0) requires_version = (1, 40, 0)
keep_only_tags = class_sels( keep_only_tags = [
'article-header', 'article-body', 'header-image') dict(id='block-nw-magazine-article-header'),
class_sels('article-header', 'article-body')
]
remove_tags = [ remove_tags = [
dict(name='meta'), dict(name=['aside', 'meta', 'source']),
class_sels( class_sels(
'block-openadstream', 'block-ibtmedia-social', 'issue-next', 'block-openadstream', 'block-ibtmedia-social', 'issue-next',
'most-popular', 'ibt-media-stories', 'user-btn-group', 'most-popular', 'ibt-media-stories', 'user-btn-group',
'trial-link', 'trc_related_container', 'trial-link', 'trc_related_container',
'block-ibtmedia-top-stories', 'videocontent', 'newsletter-signup', 'block-ibtmedia-top-stories', 'videocontent', 'newsletter-signup',
'in-text-slideshows', 'content-correction', 'article-navigation'
), ),
dict(id=['taboola-below-main-column', 'piano-root', dict(id=['taboola-below-main-column', 'piano-root',
'block-nw-magazine-magazine-more-from-issue']), 'block-nw-magazine-magazine-more-from-issue']),
@ -46,7 +49,7 @@ class Newsweek(BasicNewsRecipe):
a = li.xpath('descendant::a[@href]')[0] a = li.xpath('descendant::a[@href]')[0]
url = href_to_url(a, add_piano=True) url = href_to_url(a, add_piano=True)
self.timefmt = self.tag_to_string(a) self.timefmt = self.tag_to_string(a)
img = li.xpath('descendant::a[@href]/img[@src]')[0] img = li.xpath('descendant::a[@href]//img[@src]')[0]
self.cover_url = img.get('src') self.cover_url = img.get('src')
root = self.index_to_soup(url, as_tree=True) root = self.index_to_soup(url, as_tree=True)
features = [] features = []