#!/usr/bin/env python # -*- coding: utf-8 -*- __license__ = "GPL v3" __copyright__ = ( "2008-2013, Darko Miletic . " "2022, Albert Aparicio Isarn " ) """ japantimes.co.jp """ from calibre.web.feeds.news import BasicNewsRecipe class JapanTimes(BasicNewsRecipe): title = "The Japan Times" __author__ = "Albert Aparicio Isarn (original recipe by Darko Miletic)" description = ( "The latest news from Japan Times, Japan's leading English-language daily newspaper" ) language = "en_JP" category = "news, politics, japan" publisher = "The Japan Times" oldest_article = 2 max_articles_per_feed = 150 no_stylesheets = True remove_javascript = True use_embedded_content = False encoding = "utf8" publication_type = "newspaper" masthead_url = "https://cdn-japantimes.com/wp-content/themes/jt_theme/library/img/japantimes-logo-tagline.png" extra_css = "body{font-family: Geneva,Arial,Helvetica,sans-serif}" conversion_options = { "comment": description, "tags": category, "publisher": publisher, "language": language, } remove_tags_before = {"name": "h1"} remove_tags_after = {"name": "ul", "attrs": {"class": "single-sns-area"}} keep_only_tags = [ {"name": "div", "attrs": {"class": "padding_block"}}, # {"name": "h5", "attrs": {"class": "writer", "role": "author"}}, # {"name": "p", "attrs": {"class": "credit"}}, ] remove_tags = [ {"name": "div", "id": "no_js_blocker", "attrs": {"class": "padding_block"}}, {"name": "div", "attrs": {"class": "single-upper-meta"}}, {"name": "ul", "attrs": {"class": "single-sns-area"}}, ] feeds = [ (u"Top Stories", u"https://www.japantimes.co.jp/feed/topstories/"), (u"News", u"https://www.japantimes.co.jp/news/feed/"), (u"Opinion", u"https://www.japantimes.co.jp/opinion/feed/"), (u"Life", u"https://www.japantimes.co.jp/life/feed/"), (u"Community", u"https://www.japantimes.co.jp/community/feed/"), (u"Culture", u"https://www.japantimes.co.jp/culture/feed/"), (u"Sports", u"https://www.japantimes.co.jp/sports/feed/"), ] def get_article_url(self, article): rurl = BasicNewsRecipe.get_article_url(self, article) return rurl.partition("?")[0] def preprocess_raw_html(self, raw, url): return "" + raw[raw.find("") :]