mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Update Washington Post
This commit is contained in:
parent
9799b4f8b7
commit
0626088861
@ -7,6 +7,12 @@ www.washingtonpost.com
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
def classes(classes):
|
||||||
|
q = frozenset(classes.split(' '))
|
||||||
|
return dict(attrs={
|
||||||
|
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||||
|
|
||||||
|
|
||||||
class TheWashingtonPost(BasicNewsRecipe):
|
class TheWashingtonPost(BasicNewsRecipe):
|
||||||
title = 'The Washington Post'
|
title = 'The Washington Post'
|
||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
@ -17,7 +23,6 @@ class TheWashingtonPost(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 200
|
max_articles_per_feed = 200
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
delay = 1
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
language = 'en'
|
language = 'en'
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
@ -26,6 +31,10 @@ class TheWashingtonPost(BasicNewsRecipe):
|
|||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(itemprop=['headline', 'articleBody']),
|
dict(itemprop=['headline', 'articleBody']),
|
||||||
]
|
]
|
||||||
|
remove_tags = [
|
||||||
|
dict(name=['meta', 'link']),
|
||||||
|
classes('inline-video'),
|
||||||
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'World', u'http://feeds.washingtonpost.com/rss/world'),
|
(u'World', u'http://feeds.washingtonpost.com/rss/world'),
|
||||||
@ -40,3 +49,8 @@ class TheWashingtonPost(BasicNewsRecipe):
|
|||||||
(u'Sports', u'http://feeds.washingtonpost.com/rss/sports'),
|
(u'Sports', u'http://feeds.washingtonpost.com/rss/sports'),
|
||||||
(u'Redskins', u'http://feeds.washingtonpost.com/rss/sports/redskins'),
|
(u'Redskins', u'http://feeds.washingtonpost.com/rss/sports/redskins'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup, *a):
|
||||||
|
for img in soup.findAll('img', attrs={'data-low-res-src': True}):
|
||||||
|
img['src'] = img['data-low-res-src']
|
||||||
|
return soup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user