mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Recipes - The Asahi Shimbun (English)
Recipe made from scratch
This commit is contained in:
parent
9d712f55de
commit
358662bf3f
165
recipes/asahi_shimbun_en.recipe
Normal file
165
recipes/asahi_shimbun_en.recipe
Normal file
@ -0,0 +1,165 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2022, Albert Aparicio Isarn <aaparicio at posteo.net>"
|
||||
|
||||
"""
|
||||
https://www.asahi.com/ajw/
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class AsahiShimbunEnglishNews(BasicNewsRecipe):
|
||||
title = "The Asahi Shimbun"
|
||||
__author__ = "Albert Aparicio Isarn"
|
||||
|
||||
description = "The Asahi Shimbun is widely regarded for its journalism as the most respected daily newspaper in Japan. The English version offers selected articles from the vernacular Asahi Shimbun, as well as extensive coverage of cool Japan,focusing on manga, travel and other timely news"
|
||||
publisher = "The Asahi Shimbun Company"
|
||||
publication_type = "newspaper"
|
||||
category = "news, japan"
|
||||
language = "en_JP"
|
||||
|
||||
index = "https://www.asahi.com"
|
||||
masthead_url = "https://p.potaufeu.asahi.com/ajw/css/images/en_logo@2x.png"
|
||||
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 40
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
remove_tags_before = {"id": "MainInner"}
|
||||
remove_tags_after = {"class": "ArticleText"}
|
||||
remove_tags = [{"name": "div", "class": "SnsUtilityArea"}]
|
||||
|
||||
def get_whats_new(self):
|
||||
soup = self.index_to_soup(self.index + "/ajw/new")
|
||||
news_section = soup.find("div", attrs={"class": "specialList"})
|
||||
|
||||
new_news = []
|
||||
|
||||
for item in news_section.findAll("li"):
|
||||
title = item.find("p", attrs={"class": "title"}).string
|
||||
date_string = item.find("p", attrs={"class": "date"}).next
|
||||
date = date_string.strip()
|
||||
url = self.index + item.find("a")["href"]
|
||||
|
||||
new_news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
||||
"url": url,
|
||||
"description": "",
|
||||
}
|
||||
)
|
||||
|
||||
return new_news
|
||||
|
||||
def get_top6(self, soup):
|
||||
top = soup.find("ul", attrs={"class": "top6"})
|
||||
|
||||
top6_news = []
|
||||
|
||||
for item in top.findAll("li"):
|
||||
title = item.find("p", attrs={"class": "title"}).string
|
||||
date_string = item.find("p", attrs={"class": "date"}).next
|
||||
date = date_string.strip()
|
||||
url = self.index + item.find("a")["href"]
|
||||
|
||||
top6_news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
||||
"url": url,
|
||||
"description": "",
|
||||
}
|
||||
)
|
||||
|
||||
return top6_news
|
||||
|
||||
def get_section_news(self, soup):
|
||||
news_grid = soup.find("ul", attrs={"class": "default"})
|
||||
|
||||
news = []
|
||||
|
||||
for item in news_grid.findAll("li"):
|
||||
title = item.find("p", attrs={"class": "title"}).string
|
||||
date_string = item.find("p", attrs={"class": "date"}).next
|
||||
date = date_string.strip()
|
||||
|
||||
url = self.index + item.find("a")["href"]
|
||||
|
||||
news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": datetime.strptime(date, "%B %d, %Y").strftime("%Y/%m/%d"),
|
||||
"url": url,
|
||||
"description": "",
|
||||
}
|
||||
)
|
||||
|
||||
return news
|
||||
|
||||
def get_section(self, section):
|
||||
soup = self.index_to_soup(self.index + "/ajw/" + section)
|
||||
|
||||
section_news_items = self.get_top6(soup)
|
||||
section_news_items.extend(self.get_section_news(soup))
|
||||
|
||||
return section_news_items
|
||||
|
||||
def get_special_section(self, section):
|
||||
soup = self.index_to_soup(self.index + "/ajw/" + section)
|
||||
top = soup.find("div", attrs={"class": "Section"})
|
||||
|
||||
special_news = []
|
||||
|
||||
for item in top.findAll("li"):
|
||||
item_a = item.find("a")
|
||||
|
||||
text_split = item_a.text.strip().split("\n")
|
||||
title = text_split[0]
|
||||
description = text_split[1].strip()
|
||||
|
||||
url = self.index + item_a["href"]
|
||||
|
||||
special_news.append(
|
||||
{
|
||||
"title": title,
|
||||
"date": "",
|
||||
"url": url,
|
||||
"description": description,
|
||||
}
|
||||
)
|
||||
|
||||
return special_news
|
||||
|
||||
def parse_index(self):
|
||||
# soup = self.index_to_soup(self.index)
|
||||
|
||||
feeds = [
|
||||
("What's New", self.get_whats_new()),
|
||||
("National Report", self.get_section("national_report")),
|
||||
("Politics", self.get_section("politics")),
|
||||
("Business", self.get_section("business")),
|
||||
("Asia & World - China", self.get_section("asia_world/china")),
|
||||
("Asia & World - Korean Peninsula", self.get_section("asia_world/korean_peninsula")),
|
||||
("Asia & World - Around Asia", self.get_section("asia_world/around_asia")),
|
||||
("Asia & World - World", self.get_section("asia_world/world")),
|
||||
("Sci & Tech", self.get_section("sci_tech")),
|
||||
("Culture - Style", self.get_section("culture/style")),
|
||||
("Culture - Cooking", self.get_section("culture/cooking")),
|
||||
("Culture - Movies", self.get_section("culture/movies")),
|
||||
("Culture - Manga & Anime", self.get_section("culture/manga_anime")),
|
||||
("Travel", self.get_section("travel")),
|
||||
("Sports", self.get_section("sports")),
|
||||
("Opinion - Editorial", self.get_section("opinion/editorial")),
|
||||
("Opinion - Vox Populi", self.get_section("opinion/vox")),
|
||||
("Opinion - Views", self.get_section("opinion/views")),
|
||||
("Special", self.get_special_section("special")),
|
||||
]
|
||||
|
||||
return feeds
|
Loading…
x
Reference in New Issue
Block a user