diff --git a/recipes/livemint.recipe b/recipes/livemint.recipe
index 9010d0f2b8..b59bfb6b12 100644
--- a/recipes/livemint.recipe
+++ b/recipes/livemint.recipe
@@ -84,6 +84,7 @@ class LiveMint(BasicNewsRecipe):
extra_css = '''
img {margin:0 auto;}
+ .psTopLogoItem img, .ecologoStory { width:100; }
#img-cap {font-size:small; text-align:center;}
.summary, .highlights, .synopsis {
font-weight:normal !important; font-style:italic; color:#202020;
@@ -129,7 +130,11 @@ class LiveMint(BasicNewsRecipe):
def preprocess_raw_html(self, raw, *a):
# remove empty p tags
- raw = re.sub(r'(
\s* \s*<\/p>)|(
\s*<\/p>)', '', raw)
+ raw = re.sub(
+ r'(
\s*)(<[^(\/|a|i|b|em|strong)])', '\g<2>', re.sub(
+ r'(
\s* \s*<\/p>)|(
\s*<\/p>)|(
\s*<\/p>)', '', raw
+ )
+ )
if '' in raw:
m = re.search(r'type="application/ld\+json">[^<]+?"@type": "NewsArticle"', raw)
raw1 = raw[m.start():]
@@ -141,8 +146,7 @@ class LiveMint(BasicNewsRecipe):
body = '
'
raw2 = re.sub(r'([^}]*)
', body, raw)
return raw2
- else:
- return raw
+ return raw
def preprocess_html(self, soup):
for strong in soup.findAll('strong'):
diff --git a/recipes/natgeo.recipe b/recipes/natgeo.recipe
index 3580693ad9..4f902af9cc 100644
--- a/recipes/natgeo.recipe
+++ b/recipes/natgeo.recipe
@@ -53,8 +53,8 @@ def parse_inline(inl):
yield ''.format(props['image']['src'])
if 'caption' in props:
yield '{}{}
'.format(
- props['caption']['text'], ' ' + props['caption']['credit']
- )
+ props['caption']['text'], ' ' + props['caption']['credit']
+ )
yield ''
if inl.get('content', {}).get('name', '') == 'ImageGroup':
if 'images' in inl['content']['props']:
@@ -64,8 +64,8 @@ def parse_inline(inl):
yield ''.format(imgs['src'])
if 'caption' in imgs:
yield '{}{}
'.format(
- imgs['caption']['text'], ' ' + imgs['caption']['credit']
- )
+ imgs['caption']['text'], ' ' + imgs['caption']['credit']
+ )
yield ''
@@ -83,7 +83,7 @@ def parse_body(x):
tag = x['type']
if tag == 'inline':
yield ''.join(parse_inline(x))
- elif 'attrs' in x and 'href' in x.get('attrs', {}):
+ elif 'attrs' in x and 'href' in x.get('attrs', ''):
yield '<' + tag + ' href = "{}">'.format(x['attrs']['href'])
for yld in parse_cont(x):
yield yld
diff --git a/recipes/natgeohis.recipe b/recipes/natgeohis.recipe
index 89a2b59ea0..d52a63f7c5 100644
--- a/recipes/natgeohis.recipe
+++ b/recipes/natgeohis.recipe
@@ -52,8 +52,8 @@ def parse_inline(inl):
yield ''.format(props['image']['src'])
if 'caption' in props:
yield '{}{}
'.format(
- props['caption']['text'], ' ' + props['caption']['credit']
- )
+ props['caption']['text'], ' ' + props['caption']['credit']
+ )
yield ''
if inl.get('content', {}).get('name', '') == 'ImageGroup':
if 'images' in inl['content']['props']:
@@ -63,8 +63,8 @@ def parse_inline(inl):
yield ''.format(imgs['src'])
if 'caption' in imgs:
yield '{}{}
'.format(
- imgs['caption']['text'], ' ' + imgs['caption']['credit']
- )
+ imgs['caption']['text'], ' ' + imgs['caption']['credit']
+ )
yield ''
@@ -82,7 +82,7 @@ def parse_body(x):
tag = x['type']
if tag == 'inline':
yield ''.join(parse_inline(x))
- elif 'attrs' in x and 'href' in x.get('attrs', {}):
+ elif 'attrs' in x and 'href' in x.get('attrs', ''):
yield '<' + tag + ' href = "{}">'.format(x['attrs']['href'])
for yld in parse_cont(x):
yield yld
diff --git a/recipes/natgeomag.recipe b/recipes/natgeomag.recipe
index 537263eb65..d4c179c88c 100644
--- a/recipes/natgeomag.recipe
+++ b/recipes/natgeomag.recipe
@@ -57,8 +57,8 @@ def parse_inline(inl):
yield ''.format(props['image']['src'])
if 'caption' in props:
yield '{}{}
'.format(
- props['caption']['text'], ' ' + props['caption']['credit']
- )
+ props['caption']['text'], ' ' + props['caption']['credit']
+ )
yield ''
if inl.get('content', {}).get('name', '') == 'ImageGroup':
if 'images' in inl['content']['props']:
@@ -68,8 +68,8 @@ def parse_inline(inl):
yield ''.format(imgs['src'])
if 'caption' in imgs:
yield '{}{}
'.format(
- imgs['caption']['text'], ' ' + imgs['caption']['credit']
- )
+ imgs['caption']['text'], ' ' + imgs['caption']['credit']
+ )
yield ''
@@ -87,7 +87,7 @@ def parse_body(x):
tag = x['type']
if tag == 'inline':
yield ''.join(parse_inline(x))
- elif 'attrs' in x and 'href' in x.get('attrs', {}):
+ elif 'attrs' in x and 'href' in x.get('attrs', ''):
yield '<' + tag + ' href = "{}">'.format(x['attrs']['href'])
for yld in parse_cont(x):
yield yld
diff --git a/recipes/phillosophy_now.recipe b/recipes/phillosophy_now.recipe
index 1fb4810ddc..350a57b4f0 100644
--- a/recipes/phillosophy_now.recipe
+++ b/recipes/phillosophy_now.recipe
@@ -27,7 +27,7 @@ class PhilosophyNow(BasicNewsRecipe):
remove_tags = [dict(name='div', attrs={'id':'welcome_box'})]
extra_css = '''
img {display:block; margin:0 auto;}
- .articleImage { font-size:small; text-align:center; }
+ .articleImageCaption { font-size:small; text-align:center; }
em, blockquote { color:#202020; }
'''