diff --git a/recipes/economist_news.recipe b/recipes/economist_news.recipe index e08f7ba91d..f2b50f57fa 100644 --- a/recipes/economist_news.recipe +++ b/recipes/economist_news.recipe @@ -121,7 +121,7 @@ def process_url(url): class EconomistNews(BasicNewsRecipe): title = 'The Economist News' - language = 'en' + language = 'en_GB' encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' diff --git a/recipes/economist_search.recipe b/recipes/economist_search.recipe index 5ac61ad57f..0a3725bfcc 100644 --- a/recipes/economist_search.recipe +++ b/recipes/economist_search.recipe @@ -12,13 +12,23 @@ from calibre.ebooks.BeautifulSoup import NavigableString, Tag from calibre.web.feeds.news import BasicNewsRecipe +def process_list(li_node): + li_html = '' + for li in li_node['items']: + if li.get('textHtml'): + li_html += f'
  • {li.get("textHtml")}
  • ' + else: + li_html += f'
  • {li.get("text", "")}
  • ' + return li_html + + def process_node(node): ntype = node.get('type', '') if ntype == 'CROSSHEAD': if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' - elif ntype == 'PARAGRAPH': + elif ntype in ['PARAGRAPH', 'BOOK_INFO']: if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' @@ -35,9 +45,15 @@ def process_node(node): return f'
    {node.get("text", "")}
    ' elif ntype == 'DIVIDER': return '
    ' + elif ntype == 'INFOGRAPHIC': + if node.get('fallback'): + return process_node(node['fallback']) elif ntype == 'INFOBOX': for x in safe_dict(node, 'components'): return f'
    {process_node(x)}
    ' + elif ntype == 'UNORDERED_LIST': + if node.get('items'): + return process_list(node) elif ntype: print('** ', ntype) return '' @@ -57,7 +73,10 @@ class JSONHasNoContent(ValueError): def load_article_from_json(raw): # open('/t/raw.json', 'w').write(raw) body = '' - data = json.loads(raw)['props']['pageProps']['cp2Content'] + try: + data = json.loads(raw)['props']['pageProps']['cp2Content'] + except Exception: + data = json.loads(raw)['props']['pageProps']['content'] body += f'
    {data.get("flyTitle", "")}
    ' body += f'

    {data["headline"]}

    ' body += f'
    {data.get("rubric", "")}
    ' @@ -114,7 +133,7 @@ def process_url(url): class econ_search(BasicNewsRecipe): title = 'The Economist - Search' - language = 'en' + language = 'en_GB' encoding = 'utf-8' __author__ = 'unkn0wn' description = (