mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update economist
This commit is contained in:
parent
712dadf69b
commit
3ebc50d03a
@ -85,6 +85,14 @@ def load_article_from_json(raw, root):
|
||||
for node in data.get('text') or ():
|
||||
process_node(node, article)
|
||||
|
||||
def process_web_list(li_node):
|
||||
li_html = ''
|
||||
for li in li_node['items']:
|
||||
if li.get('textHtml'):
|
||||
li_html += f'<li>{li.get("textHtml")}</li>'
|
||||
else:
|
||||
li_html += f'<li>{li.get("text", "")}</li>'
|
||||
return li_html
|
||||
|
||||
def process_web_node(node):
|
||||
ntype = node.get('type', '')
|
||||
@ -92,7 +100,7 @@ def process_web_node(node):
|
||||
if node.get('textHtml'):
|
||||
return f'<h4>{node.get("textHtml")}</h4>'
|
||||
return f'<h4>{node.get("text", "")}</h4>'
|
||||
elif ntype == 'PARAGRAPH':
|
||||
elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
|
||||
if node.get('textHtml'):
|
||||
return f'<p>{node.get("textHtml")}</p>'
|
||||
return f'<p>{node.get("text", "")}</p>'
|
||||
@ -109,9 +117,15 @@ def process_web_node(node):
|
||||
return f'<blockquote>{node.get("text", "")}</blockquote>'
|
||||
elif ntype == 'DIVIDER':
|
||||
return '<hr>'
|
||||
elif ntype == 'INFOGRAPHIC':
|
||||
if node.get('fallback'):
|
||||
return process_web_node(node['fallback'])
|
||||
elif ntype == 'INFOBOX':
|
||||
for x in safe_dict(node, 'components'):
|
||||
return f'<blockquote>{process_web_node(x)}</blockquote>'
|
||||
elif ntype == 'UNORDERED_LIST':
|
||||
if node.get('items'):
|
||||
return process_web_list(node)
|
||||
elif ntype:
|
||||
print('** ', ntype)
|
||||
return ''
|
||||
@ -120,7 +134,10 @@ def process_web_node(node):
|
||||
def load_article_from_web_json(raw):
|
||||
# open('/t/raw.json', 'w').write(raw)
|
||||
body = ''
|
||||
data = json.loads(raw)['props']['pageProps']['content']
|
||||
try:
|
||||
data = json.loads(raw)['props']['pageProps']['cp2Content']
|
||||
except Exception:
|
||||
data = json.loads(raw)['props']['pageProps']['content']
|
||||
body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>'
|
||||
body += f'<h1>{data["headline"]}</h1>'
|
||||
if data.get('rubric') and data.get('rubric') is not None:
|
||||
@ -182,7 +199,7 @@ def process_url(url):
|
||||
|
||||
class Economist(BasicNewsRecipe):
|
||||
title = 'The Economist'
|
||||
language = 'en'
|
||||
language = 'en_GB'
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
|
||||
|
||||
@ -274,7 +291,7 @@ class Economist(BasicNewsRecipe):
|
||||
|
||||
def economist_test_article(self):
|
||||
return [('Articles', [{'title':'test',
|
||||
'url':'https://www.economist.com/the-americas/2024/04/14/elon-musk-is-feuding-with-brazils-powerful-supreme-court'
|
||||
'url':'https://www.economist.com/leaders/2025/03/13/americas-bullied-allies-need-to-toughen-up'
|
||||
}])]
|
||||
|
||||
def economist_return_index(self, ans):
|
||||
|
@ -85,6 +85,14 @@ def load_article_from_json(raw, root):
|
||||
for node in data.get('text') or ():
|
||||
process_node(node, article)
|
||||
|
||||
def process_web_list(li_node):
|
||||
li_html = ''
|
||||
for li in li_node['items']:
|
||||
if li.get('textHtml'):
|
||||
li_html += f'<li>{li.get("textHtml")}</li>'
|
||||
else:
|
||||
li_html += f'<li>{li.get("text", "")}</li>'
|
||||
return li_html
|
||||
|
||||
def process_web_node(node):
|
||||
ntype = node.get('type', '')
|
||||
@ -92,7 +100,7 @@ def process_web_node(node):
|
||||
if node.get('textHtml'):
|
||||
return f'<h4>{node.get("textHtml")}</h4>'
|
||||
return f'<h4>{node.get("text", "")}</h4>'
|
||||
elif ntype == 'PARAGRAPH':
|
||||
elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
|
||||
if node.get('textHtml'):
|
||||
return f'<p>{node.get("textHtml")}</p>'
|
||||
return f'<p>{node.get("text", "")}</p>'
|
||||
@ -109,9 +117,15 @@ def process_web_node(node):
|
||||
return f'<blockquote>{node.get("text", "")}</blockquote>'
|
||||
elif ntype == 'DIVIDER':
|
||||
return '<hr>'
|
||||
elif ntype == 'INFOGRAPHIC':
|
||||
if node.get('fallback'):
|
||||
return process_web_node(node['fallback'])
|
||||
elif ntype == 'INFOBOX':
|
||||
for x in safe_dict(node, 'components'):
|
||||
return f'<blockquote>{process_web_node(x)}</blockquote>'
|
||||
elif ntype == 'UNORDERED_LIST':
|
||||
if node.get('items'):
|
||||
return process_web_list(node)
|
||||
elif ntype:
|
||||
print('** ', ntype)
|
||||
return ''
|
||||
@ -120,7 +134,10 @@ def process_web_node(node):
|
||||
def load_article_from_web_json(raw):
|
||||
# open('/t/raw.json', 'w').write(raw)
|
||||
body = ''
|
||||
data = json.loads(raw)['props']['pageProps']['content']
|
||||
try:
|
||||
data = json.loads(raw)['props']['pageProps']['cp2Content']
|
||||
except Exception:
|
||||
data = json.loads(raw)['props']['pageProps']['content']
|
||||
body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>'
|
||||
body += f'<h1>{data["headline"]}</h1>'
|
||||
if data.get('rubric') and data.get('rubric') is not None:
|
||||
@ -182,7 +199,7 @@ def process_url(url):
|
||||
|
||||
class Economist(BasicNewsRecipe):
|
||||
title = 'The Economist'
|
||||
language = 'en'
|
||||
language = 'en_GB'
|
||||
encoding = 'utf-8'
|
||||
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
|
||||
|
||||
@ -274,7 +291,7 @@ class Economist(BasicNewsRecipe):
|
||||
|
||||
def economist_test_article(self):
|
||||
return [('Articles', [{'title':'test',
|
||||
'url':'https://www.economist.com/the-americas/2024/04/14/elon-musk-is-feuding-with-brazils-powerful-supreme-court'
|
||||
'url':'https://www.economist.com/leaders/2025/03/13/americas-bullied-allies-need-to-toughen-up'
|
||||
}])]
|
||||
|
||||
def economist_return_index(self, ans):
|
||||
|
@ -21,7 +21,6 @@ class TheHindufeeds(BasicNewsRecipe):
|
||||
.author, .dateLine, .publish-time {font-size:small; font-weight:bold;}
|
||||
.subhead, .subhead_lead, .bold {font-weight:bold;}
|
||||
.update-publish-time, .publish-time-new {font-size:small; }
|
||||
img {display:block; margin:0 auto;}
|
||||
.italic {font-style:italic; color:#202020;}
|
||||
'''
|
||||
|
||||
@ -55,7 +54,7 @@ class TheHindufeeds(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for cap in soup.findAll('p', attrs={'class': 'caption'}):
|
||||
cap.name = 'figcaption'
|
||||
cap.name = 'div'
|
||||
for img in soup.findAll('img', attrs={'data-original': True}):
|
||||
if img['data-original'].endswith('1x1_spacer.png'):
|
||||
source = img.findPrevious('source', srcset=True)
|
||||
@ -91,7 +90,7 @@ class TheHindufeeds(BasicNewsRecipe):
|
||||
('Business', 'https://www.thehindu.com/business/feeder/default.rss'),
|
||||
('World', 'https://www.thehindu.com/news/international/feeder/default.rss'),
|
||||
# ('Sport', 'https://www.thehindu.com/sport/feeder/default.rss'),
|
||||
('Entertainment', 'https://www.thehindu.com/entertainment/feeder/default.rss'),
|
||||
# ('Entertainment', 'https://www.thehindu.com/entertainment/feeder/default.rss'),
|
||||
# ('Crossword', 'https://crossword.thehindu.com/?utm_source=thehindu&utm_medium=mainmenufeeder/default.rss'),
|
||||
('Science', 'https://www.thehindu.com/sci-tech/science/feeder/default.rss'),
|
||||
('Life and Style', 'https://www.thehindu.com/life-and-style/feeder/default.rss'),
|
||||
|
Loading…
x
Reference in New Issue
Block a user