calibre/recipes/nhk_news.recipe
un-pogaz 8d28380515 add/remove blank-line (extra-edit)
ruff 'E302,E303,E304,E305,W391'
2025-01-24 11:14:21 +01:00

39 lines
1.4 KiB
Python

#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
# feed source: https://www.nhk.or.jp/toppage/rss/index.html
class ReutersJa(BasicNewsRecipe):
title = 'NHK News'
description = 'NHK News in Japanese'
__author__ = 'Richard A. Steps'
use_embedded_content = False
language = 'ja'
max_articles_per_feed = 30
remove_javascript = True
auto_cleanup = True
# This line added to deal with bots on site
def get_browser(self, *a, **kw):
kw['user_agent'] = 'common_words/based'
return super().get_browser(*a, **kw)
feeds = [
('主要ニュース', 'https://www.nhk.or.jp/rss/news/cat0.xml?format=xml'),
('社会', 'https://www.nhk.or.jp/rss/news/cat1.xml?format=xml'),
('科学・医療', 'https://www.nhk.or.jp/rss/news/cat3.xml?format=xml'),
('政治', 'https://www.nhk.or.jp/rss/news/cat4.xml?format=xml'),
('経済', 'https://www.nhk.or.jp/rss/news/cat5.xml?format=xml'),
('国際', 'https://www.nhk.or.jp/rss/news/cat6.xml?format=xml'),
('スポーツ', 'https://www.nhk.or.jp/rss/news/cat7.xml?format=xml'),
('文化・エンタメ', 'https://www.nhk.or.jp/rss/news/cat2.xml?format=xml')
]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-src':True}):
img['src'] = img['data-src']
return soup