From 6d3865ca107384cc0530b62ef90b2613a28fb2f0 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Mon, 11 Mar 2024 09:27:33 +0530 Subject: [PATCH] update livemint --- recipes/livemint.recipe | 10 +++++++--- recipes/natgeo.recipe | 10 +++++----- recipes/natgeohis.recipe | 10 +++++----- recipes/natgeomag.recipe | 10 +++++----- recipes/phillosophy_now.recipe | 2 +- 5 files changed, 23 insertions(+), 19 deletions(-) diff --git a/recipes/livemint.recipe b/recipes/livemint.recipe index 9010d0f2b8..b59bfb6b12 100644 --- a/recipes/livemint.recipe +++ b/recipes/livemint.recipe @@ -84,6 +84,7 @@ class LiveMint(BasicNewsRecipe): extra_css = ''' img {margin:0 auto;} + .psTopLogoItem img, .ecologoStory { width:100; } #img-cap {font-size:small; text-align:center;} .summary, .highlights, .synopsis { font-weight:normal !important; font-style:italic; color:#202020; @@ -129,7 +130,11 @@ class LiveMint(BasicNewsRecipe): def preprocess_raw_html(self, raw, *a): # remove empty p tags - raw = re.sub(r'(

\s* \s*<\/p>)|(

\s*<\/p>)', '', raw) + raw = re.sub( + r'(

\s*)(<[^(\/|a|i|b|em|strong)])', '\g<2>', re.sub( + r'(

\s* \s*<\/p>)|(

\s*<\/p>)|( \s*<\/p>)', '', raw + ) + ) if '' in raw: m = re.search(r'type="application/ld\+json">[^<]+?"@type": "NewsArticle"', raw) raw1 = raw[m.start():] @@ -141,8 +146,7 @@ class LiveMint(BasicNewsRecipe): body = '

' + body + '

' raw2 = re.sub(r'
([^}]*)
', body, raw) return raw2 - else: - return raw + return raw def preprocess_html(self, soup): for strong in soup.findAll('strong'): diff --git a/recipes/natgeo.recipe b/recipes/natgeo.recipe index 3580693ad9..4f902af9cc 100644 --- a/recipes/natgeo.recipe +++ b/recipes/natgeo.recipe @@ -53,8 +53,8 @@ def parse_inline(inl): yield '
'.format(props['image']['src']) if 'caption' in props: yield '
{}{}
'.format( - props['caption']['text'], ' ' + props['caption']['credit'] - ) + props['caption']['text'], ' ' + props['caption']['credit'] + ) yield '

' if inl.get('content', {}).get('name', '') == 'ImageGroup': if 'images' in inl['content']['props']: @@ -64,8 +64,8 @@ def parse_inline(inl): yield '
'.format(imgs['src']) if 'caption' in imgs: yield '
{}{}
'.format( - imgs['caption']['text'], ' ' + imgs['caption']['credit'] - ) + imgs['caption']['text'], ' ' + imgs['caption']['credit'] + ) yield '

' @@ -83,7 +83,7 @@ def parse_body(x): tag = x['type'] if tag == 'inline': yield ''.join(parse_inline(x)) - elif 'attrs' in x and 'href' in x.get('attrs', {}): + elif 'attrs' in x and 'href' in x.get('attrs', ''): yield '<' + tag + ' href = "{}">'.format(x['attrs']['href']) for yld in parse_cont(x): yield yld diff --git a/recipes/natgeohis.recipe b/recipes/natgeohis.recipe index 89a2b59ea0..d52a63f7c5 100644 --- a/recipes/natgeohis.recipe +++ b/recipes/natgeohis.recipe @@ -52,8 +52,8 @@ def parse_inline(inl): yield '
'.format(props['image']['src']) if 'caption' in props: yield '
{}{}
'.format( - props['caption']['text'], ' ' + props['caption']['credit'] - ) + props['caption']['text'], ' ' + props['caption']['credit'] + ) yield '

' if inl.get('content', {}).get('name', '') == 'ImageGroup': if 'images' in inl['content']['props']: @@ -63,8 +63,8 @@ def parse_inline(inl): yield '
'.format(imgs['src']) if 'caption' in imgs: yield '
{}{}
'.format( - imgs['caption']['text'], ' ' + imgs['caption']['credit'] - ) + imgs['caption']['text'], ' ' + imgs['caption']['credit'] + ) yield '

' @@ -82,7 +82,7 @@ def parse_body(x): tag = x['type'] if tag == 'inline': yield ''.join(parse_inline(x)) - elif 'attrs' in x and 'href' in x.get('attrs', {}): + elif 'attrs' in x and 'href' in x.get('attrs', ''): yield '<' + tag + ' href = "{}">'.format(x['attrs']['href']) for yld in parse_cont(x): yield yld diff --git a/recipes/natgeomag.recipe b/recipes/natgeomag.recipe index 537263eb65..d4c179c88c 100644 --- a/recipes/natgeomag.recipe +++ b/recipes/natgeomag.recipe @@ -57,8 +57,8 @@ def parse_inline(inl): yield '
'.format(props['image']['src']) if 'caption' in props: yield '
{}{}
'.format( - props['caption']['text'], ' ' + props['caption']['credit'] - ) + props['caption']['text'], ' ' + props['caption']['credit'] + ) yield '

' if inl.get('content', {}).get('name', '') == 'ImageGroup': if 'images' in inl['content']['props']: @@ -68,8 +68,8 @@ def parse_inline(inl): yield '
'.format(imgs['src']) if 'caption' in imgs: yield '
{}{}
'.format( - imgs['caption']['text'], ' ' + imgs['caption']['credit'] - ) + imgs['caption']['text'], ' ' + imgs['caption']['credit'] + ) yield '

' @@ -87,7 +87,7 @@ def parse_body(x): tag = x['type'] if tag == 'inline': yield ''.join(parse_inline(x)) - elif 'attrs' in x and 'href' in x.get('attrs', {}): + elif 'attrs' in x and 'href' in x.get('attrs', ''): yield '<' + tag + ' href = "{}">'.format(x['attrs']['href']) for yld in parse_cont(x): yield yld diff --git a/recipes/phillosophy_now.recipe b/recipes/phillosophy_now.recipe index 1fb4810ddc..350a57b4f0 100644 --- a/recipes/phillosophy_now.recipe +++ b/recipes/phillosophy_now.recipe @@ -27,7 +27,7 @@ class PhilosophyNow(BasicNewsRecipe): remove_tags = [dict(name='div', attrs={'id':'welcome_box'})] extra_css = ''' img {display:block; margin:0 auto;} - .articleImage { font-size:small; text-align:center; } + .articleImageCaption { font-size:small; text-align:center; } em, blockquote { color:#202020; } '''