mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Update Washington Post
This commit is contained in:
parent
876623ab2e
commit
450b64a6f8
@ -29,11 +29,13 @@ class TheWashingtonPost(BasicNewsRecipe):
|
|||||||
publication_type = 'newspaper'
|
publication_type = 'newspaper'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(itemprop=['headline', 'articleBody']),
|
dict(name=['h1', 'figure']),
|
||||||
|
classes('byline article-body'),
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['meta', 'link']),
|
dict(name=['meta', 'link']),
|
||||||
classes('inline-video'),
|
classes('inline-video author-tooltip author-image'),
|
||||||
|
dict(attrs={'data-qa': 'article-body-ad'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
@ -51,6 +53,6 @@ class TheWashingtonPost(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def preprocess_html(self, soup, *a):
|
def preprocess_html(self, soup, *a):
|
||||||
for img in soup.findAll('img', attrs={'data-low-res-src': True}):
|
for img in soup.findAll('img', srcset=True):
|
||||||
img['src'] = img['data-low-res-src']
|
img['src'] = img['srcset'].split()[0]
|
||||||
return soup
|
return soup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user