mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
update livemint
This commit is contained in:
parent
88d926143e
commit
6d3865ca10
@ -84,6 +84,7 @@ class LiveMint(BasicNewsRecipe):
|
|||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
img {margin:0 auto;}
|
img {margin:0 auto;}
|
||||||
|
.psTopLogoItem img, .ecologoStory { width:100; }
|
||||||
#img-cap {font-size:small; text-align:center;}
|
#img-cap {font-size:small; text-align:center;}
|
||||||
.summary, .highlights, .synopsis {
|
.summary, .highlights, .synopsis {
|
||||||
font-weight:normal !important; font-style:italic; color:#202020;
|
font-weight:normal !important; font-style:italic; color:#202020;
|
||||||
@ -129,7 +130,11 @@ class LiveMint(BasicNewsRecipe):
|
|||||||
|
|
||||||
def preprocess_raw_html(self, raw, *a):
|
def preprocess_raw_html(self, raw, *a):
|
||||||
# remove empty p tags
|
# remove empty p tags
|
||||||
raw = re.sub(r'(<p>\s* \s*<\/p>)|(<p>\s*<\/p>)', '', raw)
|
raw = re.sub(
|
||||||
|
r'(<p>\s*)(<[^(\/|a|i|b|em|strong)])', '\g<2>', re.sub(
|
||||||
|
r'(<p>\s* \s*<\/p>)|(<p>\s*<\/p>)|(<p\s*\S+> \s*<\/p>)', '', raw
|
||||||
|
)
|
||||||
|
)
|
||||||
if '<script>var wsjFlag=true;</script>' in raw:
|
if '<script>var wsjFlag=true;</script>' in raw:
|
||||||
m = re.search(r'type="application/ld\+json">[^<]+?"@type": "NewsArticle"', raw)
|
m = re.search(r'type="application/ld\+json">[^<]+?"@type": "NewsArticle"', raw)
|
||||||
raw1 = raw[m.start():]
|
raw1 = raw[m.start():]
|
||||||
@ -141,7 +146,6 @@ class LiveMint(BasicNewsRecipe):
|
|||||||
body = '<div class="FirstEle"> <p>' + body + '</p> </div>'
|
body = '<div class="FirstEle"> <p>' + body + '</p> </div>'
|
||||||
raw2 = re.sub(r'<div class="FirstEle">([^}]*)</div>', body, raw)
|
raw2 = re.sub(r'<div class="FirstEle">([^}]*)</div>', body, raw)
|
||||||
return raw2
|
return raw2
|
||||||
else:
|
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
@ -83,7 +83,7 @@ def parse_body(x):
|
|||||||
tag = x['type']
|
tag = x['type']
|
||||||
if tag == 'inline':
|
if tag == 'inline':
|
||||||
yield ''.join(parse_inline(x))
|
yield ''.join(parse_inline(x))
|
||||||
elif 'attrs' in x and 'href' in x.get('attrs', {}):
|
elif 'attrs' in x and 'href' in x.get('attrs', ''):
|
||||||
yield '<' + tag + ' href = "{}">'.format(x['attrs']['href'])
|
yield '<' + tag + ' href = "{}">'.format(x['attrs']['href'])
|
||||||
for yld in parse_cont(x):
|
for yld in parse_cont(x):
|
||||||
yield yld
|
yield yld
|
||||||
|
@ -82,7 +82,7 @@ def parse_body(x):
|
|||||||
tag = x['type']
|
tag = x['type']
|
||||||
if tag == 'inline':
|
if tag == 'inline':
|
||||||
yield ''.join(parse_inline(x))
|
yield ''.join(parse_inline(x))
|
||||||
elif 'attrs' in x and 'href' in x.get('attrs', {}):
|
elif 'attrs' in x and 'href' in x.get('attrs', ''):
|
||||||
yield '<' + tag + ' href = "{}">'.format(x['attrs']['href'])
|
yield '<' + tag + ' href = "{}">'.format(x['attrs']['href'])
|
||||||
for yld in parse_cont(x):
|
for yld in parse_cont(x):
|
||||||
yield yld
|
yield yld
|
||||||
|
@ -87,7 +87,7 @@ def parse_body(x):
|
|||||||
tag = x['type']
|
tag = x['type']
|
||||||
if tag == 'inline':
|
if tag == 'inline':
|
||||||
yield ''.join(parse_inline(x))
|
yield ''.join(parse_inline(x))
|
||||||
elif 'attrs' in x and 'href' in x.get('attrs', {}):
|
elif 'attrs' in x and 'href' in x.get('attrs', ''):
|
||||||
yield '<' + tag + ' href = "{}">'.format(x['attrs']['href'])
|
yield '<' + tag + ' href = "{}">'.format(x['attrs']['href'])
|
||||||
for yld in parse_cont(x):
|
for yld in parse_cont(x):
|
||||||
yield yld
|
yield yld
|
||||||
|
@ -27,7 +27,7 @@ class PhilosophyNow(BasicNewsRecipe):
|
|||||||
remove_tags = [dict(name='div', attrs={'id':'welcome_box'})]
|
remove_tags = [dict(name='div', attrs={'id':'welcome_box'})]
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
img {display:block; margin:0 auto;}
|
img {display:block; margin:0 auto;}
|
||||||
.articleImage { font-size:small; text-align:center; }
|
.articleImageCaption { font-size:small; text-align:center; }
|
||||||
em, blockquote { color:#202020; }
|
em, blockquote { color:#202020; }
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user