Update WSJ

This commit is contained in:
Kovid Goyal 2023-09-15 16:13:24 +05:30
parent c093659f84
commit 6ed414c1ca
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 10 additions and 6 deletions

View File

@ -66,13 +66,15 @@ class WSJ(BasicNewsRecipe):
''' '''
keep_only_tags = [ keep_only_tags = [
classes('wsj-article-headline-wrap articleLead bylineWrap bigTop-hero'), classes('wsj-article-headline-wrap articleLead bylineWrap bigTop-hero article-container'),
dict(name='section', attrs={'subscriptions-section':'content'}) dict(name='section', attrs={'subscriptions-section':'content'})
] ]
remove_tags = [ remove_tags = [
classes('wsj-ad newsletter-inset media-object-video media-object-podcast podcast--iframe dynamic-inset-overflow-button'), classes('wsj-ad newsletter-inset media-object-video media-object-podcast podcast--iframe dynamic-inset-overflow-button'),
dict(name='amp-iframe') # interactive graphics dict(role=["toolbar", "complementary"]),
dict(attrs={"aria-label": ["Sponsored Offers", "What to Read Next"]}),
dict(name='amp-iframe'), # interactive graphics
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
@ -91,7 +93,7 @@ class WSJ(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
from datetime import date from datetime import date
cover = 'https://img.kiosko.net/' + date.today().strftime('%Y/%m/%d') + '/us/wsj.750.jpg' cover = 'https://img.kiosko.net/' + date.today().strftime('%Y/%m/%d') + '/us/wsj.750.jpg'
br = BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False) br = BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False)
try: try:
br.open(cover) br.open(cover)
except: except:

View File

@ -66,13 +66,15 @@ class WSJ(BasicNewsRecipe):
''' '''
keep_only_tags = [ keep_only_tags = [
classes('wsj-article-headline-wrap articleLead bylineWrap bigTop-hero'), classes('wsj-article-headline-wrap articleLead bylineWrap bigTop-hero article-container'),
dict(name='section', attrs={'subscriptions-section':'content'}) dict(name='section', attrs={'subscriptions-section':'content'})
] ]
remove_tags = [ remove_tags = [
classes('wsj-ad newsletter-inset media-object-video media-object-podcast podcast--iframe dynamic-inset-overflow-button'), classes('wsj-ad newsletter-inset media-object-video media-object-podcast podcast--iframe dynamic-inset-overflow-button'),
dict(name='amp-iframe') # interactive graphics dict(role=["toolbar", "complementary"]),
dict(attrs={"aria-label": ["Sponsored Offers", "What to Read Next"]}),
dict(name='amp-iframe'), # interactive graphics
] ]
def preprocess_html(self, soup): def preprocess_html(self, soup):
@ -91,7 +93,7 @@ class WSJ(BasicNewsRecipe):
def get_cover_url(self): def get_cover_url(self):
from datetime import date from datetime import date
cover = 'https://img.kiosko.net/' + date.today().strftime('%Y/%m/%d') + '/us/wsj.750.jpg' cover = 'https://img.kiosko.net/' + date.today().strftime('%Y/%m/%d') + '/us/wsj.750.jpg'
br = BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False) br = BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False)
try: try:
br.open(cover) br.open(cover)
except: except: