diff --git a/recipes/1843.recipe b/recipes/1843.recipe index 75c7017f09..c698f2c67c 100644 --- a/recipes/1843.recipe +++ b/recipes/1843.recipe @@ -11,13 +11,30 @@ from calibre.ebooks.BeautifulSoup import NavigableString, Tag from calibre.web.feeds.news import BasicNewsRecipe +def process_list(li_node): + li_html = '' + for li in li_node['items']: + if li.get('textHtml'): + li_html += f'
  • {li.get("textHtml")}
  • ' + else: + li_html += f'
  • {li.get("text", "")}
  • ' + return li_html + + +def process_info_box(bx): + info = '' + for x in safe_dict(bx, 'components'): + info += f'
    {process_node(x)}
    ' + return info + + def process_node(node): ntype = node.get('type', '') if ntype == 'CROSSHEAD': if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' - elif ntype == 'PARAGRAPH': + elif ntype in ['PARAGRAPH', 'BOOK_INFO']: if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' @@ -34,9 +51,14 @@ def process_node(node): return f'
    {node.get("text", "")}
    ' elif ntype == 'DIVIDER': return '
    ' + elif ntype == 'INFOGRAPHIC': + if node.get('fallback'): + return process_node(node['fallback']) elif ntype == 'INFOBOX': - for x in safe_dict(node, 'components'): - return f'
    {process_node(x)}
    ' + return process_info_box(node) + elif ntype == 'UNORDERED_LIST': + if node.get('items'): + return process_list(node) elif ntype: print('** ', ntype) return '' @@ -121,7 +143,7 @@ def process_url(url): class Econ1843(BasicNewsRecipe): title = 'Economist 1843' - language = 'en' + language = 'en_GB' encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' diff --git a/recipes/economist.recipe b/recipes/economist.recipe index 0648e9e228..6cbed35d6d 100644 --- a/recipes/economist.recipe +++ b/recipes/economist.recipe @@ -86,13 +86,30 @@ def load_article_from_json(raw, root): process_node(node, article) +def process_web_list(li_node): + li_html = '' + for li in li_node['items']: + if li.get('textHtml'): + li_html += f'
  • {li.get("textHtml")}
  • ' + else: + li_html += f'
  • {li.get("text", "")}
  • ' + return li_html + + +def process_info_box(bx): + info = '' + for x in safe_dict(bx, 'components'): + info += f'
    {process_web_node(x)}
    ' + return info + + def process_web_node(node): ntype = node.get('type', '') if ntype == 'CROSSHEAD': if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' - elif ntype == 'PARAGRAPH': + elif ntype in ['PARAGRAPH', 'BOOK_INFO']: if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' @@ -109,9 +126,14 @@ def process_web_node(node): return f'
    {node.get("text", "")}
    ' elif ntype == 'DIVIDER': return '
    ' + elif ntype == 'INFOGRAPHIC': + if node.get('fallback'): + return process_web_node(node['fallback']) elif ntype == 'INFOBOX': - for x in safe_dict(node, 'components'): - return f'
    {process_web_node(x)}
    ' + return process_info_box(node) + elif ntype == 'UNORDERED_LIST': + if node.get('items'): + return process_web_list(node) elif ntype: print('** ', ntype) return '' @@ -120,7 +142,10 @@ def process_web_node(node): def load_article_from_web_json(raw): # open('/t/raw.json', 'w').write(raw) body = '' - data = json.loads(raw)['props']['pageProps']['content'] + try: + data = json.loads(raw)['props']['pageProps']['cp2Content'] + except Exception: + data = json.loads(raw)['props']['pageProps']['content'] body += f'
    {data.get("flyTitle", "")}
    ' body += f'

    {data["headline"]}

    ' if data.get('rubric') and data.get('rubric') is not None: @@ -182,7 +207,7 @@ def process_url(url): class Economist(BasicNewsRecipe): title = 'The Economist' - language = 'en' + language = 'en_GB' encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' @@ -274,7 +299,7 @@ class Economist(BasicNewsRecipe): def economist_test_article(self): return [('Articles', [{'title':'test', - 'url':'https://www.economist.com/the-americas/2024/04/14/elon-musk-is-feuding-with-brazils-powerful-supreme-court' + 'url':'https://www.economist.com/leaders/2025/03/13/americas-bullied-allies-need-to-toughen-up' }])] def economist_return_index(self, ans): diff --git a/recipes/economist_free.recipe b/recipes/economist_free.recipe index 0648e9e228..6cbed35d6d 100644 --- a/recipes/economist_free.recipe +++ b/recipes/economist_free.recipe @@ -86,13 +86,30 @@ def load_article_from_json(raw, root): process_node(node, article) +def process_web_list(li_node): + li_html = '' + for li in li_node['items']: + if li.get('textHtml'): + li_html += f'
  • {li.get("textHtml")}
  • ' + else: + li_html += f'
  • {li.get("text", "")}
  • ' + return li_html + + +def process_info_box(bx): + info = '' + for x in safe_dict(bx, 'components'): + info += f'
    {process_web_node(x)}
    ' + return info + + def process_web_node(node): ntype = node.get('type', '') if ntype == 'CROSSHEAD': if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' - elif ntype == 'PARAGRAPH': + elif ntype in ['PARAGRAPH', 'BOOK_INFO']: if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' @@ -109,9 +126,14 @@ def process_web_node(node): return f'
    {node.get("text", "")}
    ' elif ntype == 'DIVIDER': return '
    ' + elif ntype == 'INFOGRAPHIC': + if node.get('fallback'): + return process_web_node(node['fallback']) elif ntype == 'INFOBOX': - for x in safe_dict(node, 'components'): - return f'
    {process_web_node(x)}
    ' + return process_info_box(node) + elif ntype == 'UNORDERED_LIST': + if node.get('items'): + return process_web_list(node) elif ntype: print('** ', ntype) return '' @@ -120,7 +142,10 @@ def process_web_node(node): def load_article_from_web_json(raw): # open('/t/raw.json', 'w').write(raw) body = '' - data = json.loads(raw)['props']['pageProps']['content'] + try: + data = json.loads(raw)['props']['pageProps']['cp2Content'] + except Exception: + data = json.loads(raw)['props']['pageProps']['content'] body += f'
    {data.get("flyTitle", "")}
    ' body += f'

    {data["headline"]}

    ' if data.get('rubric') and data.get('rubric') is not None: @@ -182,7 +207,7 @@ def process_url(url): class Economist(BasicNewsRecipe): title = 'The Economist' - language = 'en' + language = 'en_GB' encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' @@ -274,7 +299,7 @@ class Economist(BasicNewsRecipe): def economist_test_article(self): return [('Articles', [{'title':'test', - 'url':'https://www.economist.com/the-americas/2024/04/14/elon-musk-is-feuding-with-brazils-powerful-supreme-court' + 'url':'https://www.economist.com/leaders/2025/03/13/americas-bullied-allies-need-to-toughen-up' }])] def economist_return_index(self, ans): diff --git a/recipes/economist_news.recipe b/recipes/economist_news.recipe index e08f7ba91d..f2b50f57fa 100644 --- a/recipes/economist_news.recipe +++ b/recipes/economist_news.recipe @@ -121,7 +121,7 @@ def process_url(url): class EconomistNews(BasicNewsRecipe): title = 'The Economist News' - language = 'en' + language = 'en_GB' encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' diff --git a/recipes/economist_search.recipe b/recipes/economist_search.recipe index 5ac61ad57f..f5ea674d54 100644 --- a/recipes/economist_search.recipe +++ b/recipes/economist_search.recipe @@ -12,13 +12,29 @@ from calibre.ebooks.BeautifulSoup import NavigableString, Tag from calibre.web.feeds.news import BasicNewsRecipe +def process_list(li_node): + li_html = '' + for li in li_node['items']: + if li.get('textHtml'): + li_html += f'
  • {li.get("textHtml")}
  • ' + else: + li_html += f'
  • {li.get("text", "")}
  • ' + return li_html + + +def process_info_box(bx): + info = '' + for x in safe_dict(bx, 'components'): + info += f'
    {process_node(x)}
    ' + return info + def process_node(node): ntype = node.get('type', '') if ntype == 'CROSSHEAD': if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' - elif ntype == 'PARAGRAPH': + elif ntype in ['PARAGRAPH', 'BOOK_INFO']: if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' @@ -35,9 +51,14 @@ def process_node(node): return f'
    {node.get("text", "")}
    ' elif ntype == 'DIVIDER': return '
    ' + elif ntype == 'INFOGRAPHIC': + if node.get('fallback'): + return process_node(node['fallback']) elif ntype == 'INFOBOX': - for x in safe_dict(node, 'components'): - return f'
    {process_node(x)}
    ' + return process_info_box(node) + elif ntype == 'UNORDERED_LIST': + if node.get('items'): + return process_list(node) elif ntype: print('** ', ntype) return '' @@ -57,7 +78,10 @@ class JSONHasNoContent(ValueError): def load_article_from_json(raw): # open('/t/raw.json', 'w').write(raw) body = '' - data = json.loads(raw)['props']['pageProps']['cp2Content'] + try: + data = json.loads(raw)['props']['pageProps']['cp2Content'] + except Exception: + data = json.loads(raw)['props']['pageProps']['content'] body += f'
    {data.get("flyTitle", "")}
    ' body += f'

    {data["headline"]}

    ' body += f'
    {data.get("rubric", "")}
    ' @@ -114,7 +138,7 @@ def process_url(url): class econ_search(BasicNewsRecipe): title = 'The Economist - Search' - language = 'en' + language = 'en_GB' encoding = 'utf-8' __author__ = 'unkn0wn' description = ( diff --git a/recipes/economist_world_ahead.recipe b/recipes/economist_world_ahead.recipe index 6d7e2336ee..85ee8f50ce 100644 --- a/recipes/economist_world_ahead.recipe +++ b/recipes/economist_world_ahead.recipe @@ -12,13 +12,30 @@ from calibre.ebooks.BeautifulSoup import NavigableString, Tag from calibre.web.feeds.news import BasicNewsRecipe +def process_list(li_node): + li_html = '' + for li in li_node['items']: + if li.get('textHtml'): + li_html += f'
  • {li.get("textHtml")}
  • ' + else: + li_html += f'
  • {li.get("text", "")}
  • ' + return li_html + + +def process_info_box(bx): + info = '' + for x in safe_dict(bx, 'components'): + info += f'
    {process_node(x)}
    ' + return info + + def process_node(node): ntype = node.get('type', '') if ntype == 'CROSSHEAD': if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' - elif ntype == 'PARAGRAPH': + elif ntype in ['PARAGRAPH', 'BOOK_INFO']: if node.get('textHtml'): return f'

    {node.get("textHtml")}

    ' return f'

    {node.get("text", "")}

    ' @@ -35,9 +52,14 @@ def process_node(node): return f'
    {node.get("text", "")}
    ' elif ntype == 'DIVIDER': return '
    ' + elif ntype == 'INFOGRAPHIC': + if node.get('fallback'): + return process_node(node['fallback']) elif ntype == 'INFOBOX': - for x in safe_dict(node, 'components'): - return f'
    {process_node(x)}
    ' + return process_info_box(node) + elif ntype == 'UNORDERED_LIST': + if node.get('items'): + return process_list(node) elif ntype: print('** ', ntype) return '' @@ -57,7 +79,10 @@ class JSONHasNoContent(ValueError): def load_article_from_json(raw): # open('/t/raw.json', 'w').write(raw) body = '' - data = json.loads(raw)['props']['pageProps']['cp2Content'] + try: + data = json.loads(raw)['props']['pageProps']['cp2Content'] + except Exception: + data = json.loads(raw)['props']['pageProps']['content'] body += f'
    {data.get("flyTitle", "")}
    ' body += f'

    {data["headline"]}

    ' body += f'
    {data.get("rubric", "")}
    ' @@ -118,7 +143,7 @@ def process_url(url): class EconomistWorld(BasicNewsRecipe): title = 'The Economist World Ahead' - language = 'en' + language = 'en_GB' encoding = 'utf-8' masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' diff --git a/recipes/hindufeeds.recipe b/recipes/hindufeeds.recipe index 73148c0ea9..1fc7e76cb0 100644 --- a/recipes/hindufeeds.recipe +++ b/recipes/hindufeeds.recipe @@ -21,7 +21,6 @@ class TheHindufeeds(BasicNewsRecipe): .author, .dateLine, .publish-time {font-size:small; font-weight:bold;} .subhead, .subhead_lead, .bold {font-weight:bold;} .update-publish-time, .publish-time-new {font-size:small; } - img {display:block; margin:0 auto;} .italic {font-style:italic; color:#202020;} ''' @@ -55,7 +54,7 @@ class TheHindufeeds(BasicNewsRecipe): def preprocess_html(self, soup): for cap in soup.findAll('p', attrs={'class': 'caption'}): - cap.name = 'figcaption' + cap.name = 'div' for img in soup.findAll('img', attrs={'data-original': True}): if img['data-original'].endswith('1x1_spacer.png'): source = img.findPrevious('source', srcset=True) @@ -91,7 +90,7 @@ class TheHindufeeds(BasicNewsRecipe): ('Business', 'https://www.thehindu.com/business/feeder/default.rss'), ('World', 'https://www.thehindu.com/news/international/feeder/default.rss'), # ('Sport', 'https://www.thehindu.com/sport/feeder/default.rss'), - ('Entertainment', 'https://www.thehindu.com/entertainment/feeder/default.rss'), + # ('Entertainment', 'https://www.thehindu.com/entertainment/feeder/default.rss'), # ('Crossword', 'https://crossword.thehindu.com/?utm_source=thehindu&utm_medium=mainmenufeeder/default.rss'), ('Science', 'https://www.thehindu.com/sci-tech/science/feeder/default.rss'), ('Life and Style', 'https://www.thehindu.com/life-and-style/feeder/default.rss'), diff --git a/recipes/spectator_magazine.recipe b/recipes/spectator_magazine.recipe index 84b29306b8..54a131a8b6 100644 --- a/recipes/spectator_magazine.recipe +++ b/recipes/spectator_magazine.recipe @@ -13,7 +13,7 @@ class spectator(BasicNewsRecipe): title = 'Spectator Magazine' __author__ = 'unkn0wn' description = 'The Spectator was established in 1828, and is the best-written and most influential weekly in the English language.' - language = 'en' + language = 'en_GB' no_stylesheets = True remove_attributes = ['height', 'width', 'style'] ignore_duplicate_articles = {'url'}