From 8904fb16943ec10d62e914ae04f173866e73148b Mon Sep 17 00:00:00 2001 From: Minsik Cho Date: Sun, 22 May 2022 00:13:21 +0900 Subject: [PATCH] Add recipe for Donga --- recipes/donga.recipe | 57 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 recipes/donga.recipe diff --git a/recipes/donga.recipe b/recipes/donga.recipe new file mode 100644 index 0000000000..d58693140b --- /dev/null +++ b/recipes/donga.recipe @@ -0,0 +1,57 @@ +import re +from calibre.web.feeds.recipes import BasicNewsRecipe + +# Comment out sections you are not interested in +sections = [ + ("정치", "politics"), + ("사회", "national"), + ("경제", "economy"), + ("국제", "international"), + ("사설칼럼", "editorials"), + ("의학과학", "science"), + ("문화연예", "culture"), + ("스포츠", "sports"), + ("사람속으로", "inmul") + # Following sections are marked as marked optional + # as default. Uncomment to enable. + # , (u'건강', 'health') + # , (u'레저', 'leisure') + # , (u'도서', 'book') + # , (u'공연', 'show') + # , (u'여성', 'woman') + # , (u'여행', 'travel') + # , (u'생활정보', 'lifeinfo') +] + + +class Donga(BasicNewsRecipe): + language = "ko" + title = "동아일보" + description = "동아일보 기사" + __author__ = "Minsik Cho" + ignore_duplicate_articles = {"title", "url"} + compress_news_images = True + no_stylesheets = True + oldest_article = 2 + encoding = "utf-8" + + # RSS Feed in syntax: + # https://rss.donga.com/[sections].xml + feeds = [(title, "https://rss.donga.com/" + section + ".xml") for (title, section) in sections] + + # Remove logo and print buttons + remove_tags = [ + dict(name="div", attrs={"class": "popHeaderWrap"}), + dict(name="div", attrs={"class": "etc"}), + ] + + def print_version(self, url): + # Original url in syntax: + # https://www.donga.com/news/[sections]/article/all/[date]/[gid]/1 + # Return print version url with syntax: + # https://www.donga.com/news/View?gid=[gid]&date=[date] + reobject = re.search("(?<=/all/)([0-9]*)/([0-9]*)", url) + date = reobject.group(1) + gid = reobject.group(2) + + return "https://www.donga.com/news/View?gid=" + gid + "&date=" + date