mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
...
This commit is contained in:
parent
3ebc50d03a
commit
26bb850d62
@ -11,13 +11,23 @@ from calibre.ebooks.BeautifulSoup import NavigableString, Tag
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
def process_list(li_node):
|
||||||
|
li_html = ''
|
||||||
|
for li in li_node['items']:
|
||||||
|
if li.get('textHtml'):
|
||||||
|
li_html += f'<li>{li.get("textHtml")}</li>'
|
||||||
|
else:
|
||||||
|
li_html += f'<li>{li.get("text", "")}</li>'
|
||||||
|
return li_html
|
||||||
|
|
||||||
|
|
||||||
def process_node(node):
|
def process_node(node):
|
||||||
ntype = node.get('type', '')
|
ntype = node.get('type', '')
|
||||||
if ntype == 'CROSSHEAD':
|
if ntype == 'CROSSHEAD':
|
||||||
if node.get('textHtml'):
|
if node.get('textHtml'):
|
||||||
return f'<h4>{node.get("textHtml")}</h4>'
|
return f'<h4>{node.get("textHtml")}</h4>'
|
||||||
return f'<h4>{node.get("text", "")}</h4>'
|
return f'<h4>{node.get("text", "")}</h4>'
|
||||||
elif ntype == 'PARAGRAPH':
|
elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
|
||||||
if node.get('textHtml'):
|
if node.get('textHtml'):
|
||||||
return f'<p>{node.get("textHtml")}</p>'
|
return f'<p>{node.get("textHtml")}</p>'
|
||||||
return f'<p>{node.get("text", "")}</p>'
|
return f'<p>{node.get("text", "")}</p>'
|
||||||
@ -34,9 +44,15 @@ def process_node(node):
|
|||||||
return f'<blockquote>{node.get("text", "")}</blockquote>'
|
return f'<blockquote>{node.get("text", "")}</blockquote>'
|
||||||
elif ntype == 'DIVIDER':
|
elif ntype == 'DIVIDER':
|
||||||
return '<hr>'
|
return '<hr>'
|
||||||
|
elif ntype == 'INFOGRAPHIC':
|
||||||
|
if node.get('fallback'):
|
||||||
|
return process_node(node['fallback'])
|
||||||
elif ntype == 'INFOBOX':
|
elif ntype == 'INFOBOX':
|
||||||
for x in safe_dict(node, 'components'):
|
for x in safe_dict(node, 'components'):
|
||||||
return f'<blockquote>{process_node(x)}</blockquote>'
|
return f'<blockquote>{process_node(x)}</blockquote>'
|
||||||
|
elif ntype == 'UNORDERED_LIST':
|
||||||
|
if node.get('items'):
|
||||||
|
return process_list(node)
|
||||||
elif ntype:
|
elif ntype:
|
||||||
print('** ', ntype)
|
print('** ', ntype)
|
||||||
return ''
|
return ''
|
||||||
@ -121,7 +137,7 @@ def process_url(url):
|
|||||||
class Econ1843(BasicNewsRecipe):
|
class Econ1843(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'Economist 1843'
|
title = 'Economist 1843'
|
||||||
language = 'en'
|
language = 'en_GB'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
|
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
|
||||||
|
|
||||||
|
@ -12,13 +12,23 @@ from calibre.ebooks.BeautifulSoup import NavigableString, Tag
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
def process_list(li_node):
|
||||||
|
li_html = ''
|
||||||
|
for li in li_node['items']:
|
||||||
|
if li.get('textHtml'):
|
||||||
|
li_html += f'<li>{li.get("textHtml")}</li>'
|
||||||
|
else:
|
||||||
|
li_html += f'<li>{li.get("text", "")}</li>'
|
||||||
|
return li_html
|
||||||
|
|
||||||
|
|
||||||
def process_node(node):
|
def process_node(node):
|
||||||
ntype = node.get('type', '')
|
ntype = node.get('type', '')
|
||||||
if ntype == 'CROSSHEAD':
|
if ntype == 'CROSSHEAD':
|
||||||
if node.get('textHtml'):
|
if node.get('textHtml'):
|
||||||
return f'<h4>{node.get("textHtml")}</h4>'
|
return f'<h4>{node.get("textHtml")}</h4>'
|
||||||
return f'<h4>{node.get("text", "")}</h4>'
|
return f'<h4>{node.get("text", "")}</h4>'
|
||||||
elif ntype == 'PARAGRAPH':
|
elif ntype in ['PARAGRAPH', 'BOOK_INFO']:
|
||||||
if node.get('textHtml'):
|
if node.get('textHtml'):
|
||||||
return f'<p>{node.get("textHtml")}</p>'
|
return f'<p>{node.get("textHtml")}</p>'
|
||||||
return f'<p>{node.get("text", "")}</p>'
|
return f'<p>{node.get("text", "")}</p>'
|
||||||
@ -35,9 +45,15 @@ def process_node(node):
|
|||||||
return f'<blockquote>{node.get("text", "")}</blockquote>'
|
return f'<blockquote>{node.get("text", "")}</blockquote>'
|
||||||
elif ntype == 'DIVIDER':
|
elif ntype == 'DIVIDER':
|
||||||
return '<hr>'
|
return '<hr>'
|
||||||
|
elif ntype == 'INFOGRAPHIC':
|
||||||
|
if node.get('fallback'):
|
||||||
|
return process_node(node['fallback'])
|
||||||
elif ntype == 'INFOBOX':
|
elif ntype == 'INFOBOX':
|
||||||
for x in safe_dict(node, 'components'):
|
for x in safe_dict(node, 'components'):
|
||||||
return f'<blockquote>{process_node(x)}</blockquote>'
|
return f'<blockquote>{process_node(x)}</blockquote>'
|
||||||
|
elif ntype == 'UNORDERED_LIST':
|
||||||
|
if node.get('items'):
|
||||||
|
return process_list(node)
|
||||||
elif ntype:
|
elif ntype:
|
||||||
print('** ', ntype)
|
print('** ', ntype)
|
||||||
return ''
|
return ''
|
||||||
@ -57,7 +73,10 @@ class JSONHasNoContent(ValueError):
|
|||||||
def load_article_from_json(raw):
|
def load_article_from_json(raw):
|
||||||
# open('/t/raw.json', 'w').write(raw)
|
# open('/t/raw.json', 'w').write(raw)
|
||||||
body = ''
|
body = ''
|
||||||
data = json.loads(raw)['props']['pageProps']['cp2Content']
|
try:
|
||||||
|
data = json.loads(raw)['props']['pageProps']['cp2Content']
|
||||||
|
except Exception:
|
||||||
|
data = json.loads(raw)['props']['pageProps']['content']
|
||||||
body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>'
|
body += f'<div style="color: red; font-size:small; font-weight:bold;">{data.get("flyTitle", "")}</div>'
|
||||||
body += f'<h1>{data["headline"]}</h1>'
|
body += f'<h1>{data["headline"]}</h1>'
|
||||||
body += f'<div style="font-style: italic; color:#202020;">{data.get("rubric", "")}</div>'
|
body += f'<div style="font-style: italic; color:#202020;">{data.get("rubric", "")}</div>'
|
||||||
@ -118,7 +137,7 @@ def process_url(url):
|
|||||||
|
|
||||||
class EconomistWorld(BasicNewsRecipe):
|
class EconomistWorld(BasicNewsRecipe):
|
||||||
title = 'The Economist World Ahead'
|
title = 'The Economist World Ahead'
|
||||||
language = 'en'
|
language = 'en_GB'
|
||||||
encoding = 'utf-8'
|
encoding = 'utf-8'
|
||||||
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
|
masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user