mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Asahi Shimbun by Albert Aparicio Isarn
This commit is contained in:
commit
0f2e921ff1
167
recipes/asahi_shimbun_en.recipe
Normal file
167
recipes/asahi_shimbun_en.recipe
Normal file
@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2022, Albert Aparicio Isarn <aaparicio at posteo.net>"
|
||||
|
||||
"""
|
||||
https://www.asahi.com/ajw/
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AsahiShimbunEnglishNews(BasicNewsRecipe):
|
||||
title = "The Asahi Shimbun"
|
||||
__author__ = "Albert Aparicio Isarn"
|
||||
|
||||
description = ("The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan."
|
||||
" The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive"
|
||||
" coverage of cool Japan,focusing on manga, travel and other timely news.")
|
||||
publisher = "The Asahi Shimbun Company"
|
||||
publication_type = "newspaper"
|
||||
category = "news, japan"
|
||||
language = "en_JP"
|
||||
|
||||
index = "https://www.asahi.com"
|
||||
masthead_url = "https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png"
|
||||
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 40
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
remove_tags_before = {"id": "MainInner"}
|
||||
remove_tags_after = {"class": "ArticleText"}
|
||||
remove_tags = [{"name": "div", "class": "SnsUtilityArea"}]
|
||||
|
||||
def get_whats_new(self):
|
||||
soup = self.index_to_soup(self.index + "/ajw/new")
|
||||
news_section = soup.find("div", attrs={"class": "specialList"})
|
||||
|
||||
new_news = []
|
||||
|
||||
for item in news_section.findAll("li"):
|
||||
title = item.find("p", attrs={"class": "title"}).string
|
||||
date_string = item.find("p", attrs={"class": "date"}).next
|
||||
date = date_string.strip()
|
||||
url = self.index + item.find("a")["href"]
|
||||
|
||||
new_news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
||||
"url": url,
|
||||
"description": "",
|
||||
}
|
||||
)
|
||||
|
||||
return new_news
|
||||
|
||||
def get_top6(self, soup):
|
||||
top = soup.find("ul", attrs={"class": "top6"})
|
||||
|
||||
top6_news = []
|
||||
|
||||
for item in top.findAll("li"):
|
||||
title = item.find("p", attrs={"class": "title"}).string
|
||||
date_string = item.find("p", attrs={"class": "date"}).next
|
||||
date = date_string.strip()
|
||||
url = self.index + item.find("a")["href"]
|
||||
|
||||
top6_news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
||||
"url": url,
|
||||
"description": "",
|
||||
}
|
||||
)
|
||||
|
||||
return top6_news
|
||||
|
||||
def get_section_news(self, soup):
|
||||
news_grid = soup.find("ul", attrs={"class": "default"})
|
||||
|
||||
news = []
|
||||
|
||||
for item in news_grid.findAll("li"):
|
||||
title = item.find("p", attrs={"class": "title"}).string
|
||||
date_string = item.find("p", attrs={"class": "date"}).next
|
||||
date = date_string.strip()
|
||||
|
||||
url = self.index + item.find("a")["href"]
|
||||
|
||||
news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
||||
"url": url,
|
||||
"description": "",
|
||||
}
|
||||
)
|
||||
|
||||
return news
|
||||
|
||||
def get_section(self, section):
|
||||
soup = self.index_to_soup(self.index + "/ajw/" + section)
|
||||
|
||||
section_news_items = self.get_top6(soup)
|
||||
section_news_items.extend(self.get_section_news(soup))
|
||||
|
||||
return section_news_items
|
||||
|
||||
def get_special_section(self, section):
|
||||
soup = self.index_to_soup(self.index + "/ajw/" + section)
|
||||
top = soup.find("div", attrs={"class": "Section"})
|
||||
|
||||
special_news = []
|
||||
|
||||
for item in top.findAll("li"):
|
||||
item_a = item.find("a")
|
||||
|
||||
text_split = item_a.text.strip().split("\n")
|
||||
title = text_split[0]
|
||||
description = text_split[1].strip()
|
||||
|
||||
url = self.index + item_a["href"]
|
||||
|
||||
special_news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": "",
|
||||
"url": url,
|
||||
"description": description,
|
||||
}
|
||||
)
|
||||
|
||||
return special_news
|
||||
|
||||
def parse_index(self):
|
||||
# soup = self.index_to_soup(self.index)
|
||||
|
||||
feeds = [
|
||||
("What's New", self.get_whats_new()),
|
||||
("National Report", self.get_section("national_report")),
|
||||
("Politics", self.get_section("politics")),
|
||||
("Business", self.get_section("business")),
|
||||
("Asia & World - China", self.get_section("asia_world/china")),
|
||||
("Asia & World - Korean Peninsula", self.get_section("asia_world/korean_peninsula")),
|
||||
("Asia & World - Around Asia", self.get_section("asia_world/around_asia")),
|
||||
("Asia & World - World", self.get_section("asia_world/world")),
|
||||
("Sci & Tech", self.get_section("sci_tech")),
|
||||
("Culture - Style", self.get_section("culture/style")),
|
||||
("Culture - Cooking", self.get_section("culture/cooking")),
|
||||
("Culture - Movies", self.get_section("culture/movies")),
|
||||
("Culture - Manga & Anime", self.get_section("culture/manga_anime")),
|
||||
("Travel", self.get_section("travel")),
|
||||
("Sports", self.get_section("sports")),
|
||||
("Opinion - Editorial", self.get_section("opinion/editorial")),
|
||||
("Opinion - Vox Populi", self.get_section("opinion/vox")),
|
||||
("Opinion - Views", self.get_section("opinion/views")),
|
||||
("Special", self.get_special_section("special")),
|
||||
]
|
||||
|
||||
return feeds
|
Loading…
x
Reference in New Issue
Block a user