...

2025-07-09 03:04:10 -04:00 · 2023-10-23 12:56:53 +05:30 · 2023-10-23 12:56:53 +05:30 · f908d8ce19
commit f908d8ce19
parent d9059b3899
2 changed files with 15 additions and 0 deletions
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@ -52,6 +52,7 @@ class WSJ(BasicNewsRecipe):
        #big-top-caption { font-size:small; text-align:center; }
        [data-type:"tagline"], em { font-style:italic; color:#202020; }
        .auth { font-size:small; }
        .sub, em, i { color: #202020; }
    '''
    keep_only_tags = [
@ -102,6 +103,8 @@ class WSJ(BasicNewsRecipe):
        for h2 in soup.findAll('h2'):
            if self.tag_to_string(h2).startswith(('What to Read Next', 'Conversation')):
                h2.extract()
            h2.name = 'h3'
            h2['class'] = 'sub'
        for ph in soup.findAll('a', attrs={'data-type':['phrase', 'link']}):
            if div := ph.findParent('div'):
                div.name = 'span'
--- a/recipes/wsj_free.recipe
+++ b/recipes/wsj_free.recipe
@ -53,6 +53,8 @@ class WSJ(BasicNewsRecipe):
    extra_css = '''
        #big-top-caption { font-size:small; text-align:center; }
        [data-type:"tagline"] { font-style:italic; color:#202020; }
        .auth { font-size:small; }
        .sub, em, i { color: #202020; }
    '''
    keep_only_tags = [
@ -102,6 +104,16 @@ class WSJ(BasicNewsRecipe):
        for h2 in soup.findAll('h2'):
            if self.tag_to_string(h2).startswith(('What to Read Next', 'Conversation')):
                h2.extract()
            h2.name = 'h3'
            h2['class'] = 'sub'
        for ph in soup.findAll('a', attrs={'data-type':['phrase', 'link']}):
            if div := ph.findParent('div'):
                div.name = 'span'
        for auth in soup.findAll('a', attrs={'aria-label': lambda x: x and x.startswith('Author page')}):
            if div := auth.find_previous_sibling('div'):
                div.name = 'span'
            if parent := auth.findParent('div'):
                parent['class'] = 'auth'
        return soup