mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
70 lines
2.5 KiB
Python
70 lines
2.5 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
__license__ = "GPL v3"
|
|
__copyright__ = (
|
|
"2008-2013, Darko Miletic <darko.miletic at gmail.com>. "
|
|
"2022, Albert Aparicio Isarn <aaparicio at posteo.net>"
|
|
)
|
|
"""
|
|
japantimes.co.jp
|
|
"""
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
class JapanTimes(BasicNewsRecipe):
|
|
title = "The Japan Times"
|
|
__author__ = "Albert Aparicio Isarn (original recipe by Darko Miletic)"
|
|
description = (
|
|
"The latest news from Japan Times, Japan's leading English-language daily newspaper"
|
|
)
|
|
language = "en_JP"
|
|
category = "news, politics, japan"
|
|
publisher = "The Japan Times"
|
|
oldest_article = 2
|
|
max_articles_per_feed = 150
|
|
no_stylesheets = True
|
|
remove_javascript = True
|
|
use_embedded_content = False
|
|
encoding = "utf8"
|
|
publication_type = "newspaper"
|
|
masthead_url = "https://cdn-japantimes.com/wp-content/themes/jt_theme/library/img/japantimes-logo-tagline.png"
|
|
extra_css = "body{font-family: Geneva,Arial,Helvetica,sans-serif}"
|
|
|
|
conversion_options = {
|
|
"comment": description,
|
|
"tags": category,
|
|
"publisher": publisher,
|
|
"language": language,
|
|
}
|
|
|
|
remove_tags_before = {"name": "h1"}
|
|
remove_tags_after = {"name": "ul", "attrs": {"class": "single-sns-area"}}
|
|
keep_only_tags = [
|
|
{"name": "div", "attrs": {"class": "padding_block"}},
|
|
# {"name": "h5", "attrs": {"class": "writer", "role": "author"}},
|
|
# {"name": "p", "attrs": {"class": "credit"}},
|
|
]
|
|
remove_tags = [
|
|
{"name": "div", "id": "no_js_blocker", "attrs": {"class": "padding_block"}},
|
|
{"name": "div", "attrs": {"class": "single-upper-meta"}},
|
|
{"name": "ul", "attrs": {"class": "single-sns-area"}},
|
|
]
|
|
feeds = [
|
|
(u"Top Stories", u"https://www.japantimes.co.jp/feed/topstories/"),
|
|
(u"News", u"https://www.japantimes.co.jp/news/feed/"),
|
|
(u"Opinion", u"https://www.japantimes.co.jp/opinion/feed/"),
|
|
(u"Life", u"https://www.japantimes.co.jp/life/feed/"),
|
|
(u"Community", u"https://www.japantimes.co.jp/community/feed/"),
|
|
(u"Culture", u"https://www.japantimes.co.jp/culture/feed/"),
|
|
(u"Sports", u"https://www.japantimes.co.jp/sports/feed/"),
|
|
]
|
|
|
|
def get_article_url(self, article):
|
|
rurl = BasicNewsRecipe.get_article_url(self, article)
|
|
return rurl.partition("?")[0]
|
|
|
|
def preprocess_raw_html(self, raw, url):
|
|
return "<html><head>" + raw[raw.find("</head>") :]
|