This commit is contained in:
unkn0w7n 2023-10-23 12:56:53 +05:30
parent d9059b3899
commit f908d8ce19
2 changed files with 15 additions and 0 deletions

View File

@ -52,6 +52,7 @@ class WSJ(BasicNewsRecipe):
#big-top-caption { font-size:small; text-align:center; } #big-top-caption { font-size:small; text-align:center; }
[data-type:"tagline"], em { font-style:italic; color:#202020; } [data-type:"tagline"], em { font-style:italic; color:#202020; }
.auth { font-size:small; } .auth { font-size:small; }
.sub, em, i { color: #202020; }
''' '''
keep_only_tags = [ keep_only_tags = [
@ -102,6 +103,8 @@ class WSJ(BasicNewsRecipe):
for h2 in soup.findAll('h2'): for h2 in soup.findAll('h2'):
if self.tag_to_string(h2).startswith(('What to Read Next', 'Conversation')): if self.tag_to_string(h2).startswith(('What to Read Next', 'Conversation')):
h2.extract() h2.extract()
h2.name = 'h3'
h2['class'] = 'sub'
for ph in soup.findAll('a', attrs={'data-type':['phrase', 'link']}): for ph in soup.findAll('a', attrs={'data-type':['phrase', 'link']}):
if div := ph.findParent('div'): if div := ph.findParent('div'):
div.name = 'span' div.name = 'span'

View File

@ -53,6 +53,8 @@ class WSJ(BasicNewsRecipe):
extra_css = ''' extra_css = '''
#big-top-caption { font-size:small; text-align:center; } #big-top-caption { font-size:small; text-align:center; }
[data-type:"tagline"] { font-style:italic; color:#202020; } [data-type:"tagline"] { font-style:italic; color:#202020; }
.auth { font-size:small; }
.sub, em, i { color: #202020; }
''' '''
keep_only_tags = [ keep_only_tags = [
@ -102,6 +104,16 @@ class WSJ(BasicNewsRecipe):
for h2 in soup.findAll('h2'): for h2 in soup.findAll('h2'):
if self.tag_to_string(h2).startswith(('What to Read Next', 'Conversation')): if self.tag_to_string(h2).startswith(('What to Read Next', 'Conversation')):
h2.extract() h2.extract()
h2.name = 'h3'
h2['class'] = 'sub'
for ph in soup.findAll('a', attrs={'data-type':['phrase', 'link']}):
if div := ph.findParent('div'):
div.name = 'span'
for auth in soup.findAll('a', attrs={'aria-label': lambda x: x and x.startswith('Author page')}):
if div := auth.find_previous_sibling('div'):
div.name = 'span'
if parent := auth.findParent('div'):
parent['class'] = 'auth'
return soup return soup