Donga by Minsik Cho

Merge branch 'add_donga' of https://github.com/mscho527/calibre
This commit is contained in:
Kovid Goyal 2022-05-21 20:53:35 +05:30
commit 69c406a1e4
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

57
recipes/donga.recipe Normal file
View File

@ -0,0 +1,57 @@
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
# Comment out sections you are not interested in
sections = [
("정치", "politics"),
("사회", "national"),
("경제", "economy"),
("국제", "international"),
("사설칼럼", "editorials"),
("의학과학", "science"),
("문화연예", "culture"),
("스포츠", "sports"),
("사람속으로", "inmul")
# Following sections are marked as marked optional
# as default. Uncomment to enable.
# , (u'건강', 'health')
# , (u'레저', 'leisure')
# , (u'도서', 'book')
# , (u'공연', 'show')
# , (u'여성', 'woman')
# , (u'여행', 'travel')
# , (u'생활정보', 'lifeinfo')
]
class Donga(BasicNewsRecipe):
language = "ko"
title = "동아일보"
description = "동아일보 기사"
__author__ = "Minsik Cho"
ignore_duplicate_articles = {"title", "url"}
compress_news_images = True
no_stylesheets = True
oldest_article = 2
encoding = "utf-8"
# RSS Feed in syntax:
# https://rss.donga.com/[sections].xml
feeds = [(title, "https://rss.donga.com/" + section + ".xml") for (title, section) in sections]
# Remove logo and print buttons
remove_tags = [
dict(name="div", attrs={"class": "popHeaderWrap"}),
dict(name="div", attrs={"class": "etc"}),
]
def print_version(self, url):
# Original url in syntax:
# https://www.donga.com/news/[sections]/article/all/[date]/[gid]/1
# Return print version url with syntax:
# https://www.donga.com/news/View?gid=[gid]&date=[date]
reobject = re.search("(?<=/all/)([0-9]*)/([0-9]*)", url)
date = reobject.group(1)
gid = reobject.group(2)
return "https://www.donga.com/news/View?gid=" + gid + "&date=" + date