Update WSJ

This commit is contained in:
Kovid Goyal 2023-09-15 16:13:24 +05:30
parent c093659f84
commit 6ed414c1ca
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 10 additions and 6 deletions

View File

@ -66,13 +66,15 @@ class WSJ(BasicNewsRecipe):
'''
keep_only_tags = [
classes('wsj-article-headline-wrap articleLead bylineWrap bigTop-hero'),
classes('wsj-article-headline-wrap articleLead bylineWrap bigTop-hero article-container'),
dict(name='section', attrs={'subscriptions-section':'content'})
]
remove_tags = [
classes('wsj-ad newsletter-inset media-object-video media-object-podcast podcast--iframe dynamic-inset-overflow-button'),
dict(name='amp-iframe') # interactive graphics
dict(role=["toolbar", "complementary"]),
dict(attrs={"aria-label": ["Sponsored Offers", "What to Read Next"]}),
dict(name='amp-iframe'), # interactive graphics
]
def preprocess_html(self, soup):
@ -91,7 +93,7 @@ class WSJ(BasicNewsRecipe):
def get_cover_url(self):
from datetime import date
cover = 'https://img.kiosko.net/' + date.today().strftime('%Y/%m/%d') + '/us/wsj.750.jpg'
br = BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False)
br = BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False)
try:
br.open(cover)
except:

View File

@ -66,13 +66,15 @@ class WSJ(BasicNewsRecipe):
'''
keep_only_tags = [
classes('wsj-article-headline-wrap articleLead bylineWrap bigTop-hero'),
classes('wsj-article-headline-wrap articleLead bylineWrap bigTop-hero article-container'),
dict(name='section', attrs={'subscriptions-section':'content'})
]
remove_tags = [
classes('wsj-ad newsletter-inset media-object-video media-object-podcast podcast--iframe dynamic-inset-overflow-button'),
dict(name='amp-iframe') # interactive graphics
dict(role=["toolbar", "complementary"]),
dict(attrs={"aria-label": ["Sponsored Offers", "What to Read Next"]}),
dict(name='amp-iframe'), # interactive graphics
]
def preprocess_html(self, soup):
@ -91,7 +93,7 @@ class WSJ(BasicNewsRecipe):
def get_cover_url(self):
from datetime import date
cover = 'https://img.kiosko.net/' + date.today().strftime('%Y/%m/%d') + '/us/wsj.750.jpg'
br = BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False)
br = BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False)
try:
br.open(cover)
except: