Update Washington Post

This commit is contained in:
Kovid Goyal 2018-10-29 10:15:44 +05:30
parent 9799b4f8b7
commit 0626088861
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -7,6 +7,12 @@ www.washingtonpost.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class TheWashingtonPost(BasicNewsRecipe): class TheWashingtonPost(BasicNewsRecipe):
title = 'The Washington Post' title = 'The Washington Post'
__author__ = 'Darko Miletic' __author__ = 'Darko Miletic'
@ -17,7 +23,6 @@ class TheWashingtonPost(BasicNewsRecipe):
max_articles_per_feed = 200 max_articles_per_feed = 200
no_stylesheets = True no_stylesheets = True
encoding = 'utf8' encoding = 'utf8'
delay = 1
use_embedded_content = False use_embedded_content = False
language = 'en' language = 'en'
remove_empty_feeds = True remove_empty_feeds = True
@ -26,6 +31,10 @@ class TheWashingtonPost(BasicNewsRecipe):
keep_only_tags = [ keep_only_tags = [
dict(itemprop=['headline', 'articleBody']), dict(itemprop=['headline', 'articleBody']),
] ]
remove_tags = [
dict(name=['meta', 'link']),
classes('inline-video'),
]
feeds = [ feeds = [
(u'World', u'http://feeds.washingtonpost.com/rss/world'), (u'World', u'http://feeds.washingtonpost.com/rss/world'),
@ -40,3 +49,8 @@ class TheWashingtonPost(BasicNewsRecipe):
(u'Sports', u'http://feeds.washingtonpost.com/rss/sports'), (u'Sports', u'http://feeds.washingtonpost.com/rss/sports'),
(u'Redskins', u'http://feeds.washingtonpost.com/rss/sports/redskins'), (u'Redskins', u'http://feeds.washingtonpost.com/rss/sports/redskins'),
] ]
def preprocess_html(self, soup, *a):
for img in soup.findAll('img', attrs={'data-low-res-src': True}):
img['src'] = img['data-low-res-src']
return soup