mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Donga by Minsik Cho
Merge branch 'add_donga' of https://github.com/mscho527/calibre
This commit is contained in:
commit
69c406a1e4
57
recipes/donga.recipe
Normal file
57
recipes/donga.recipe
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
import re
|
||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
# Comment out sections you are not interested in
|
||||||
|
sections = [
|
||||||
|
("정치", "politics"),
|
||||||
|
("사회", "national"),
|
||||||
|
("경제", "economy"),
|
||||||
|
("국제", "international"),
|
||||||
|
("사설칼럼", "editorials"),
|
||||||
|
("의학과학", "science"),
|
||||||
|
("문화연예", "culture"),
|
||||||
|
("스포츠", "sports"),
|
||||||
|
("사람속으로", "inmul")
|
||||||
|
# Following sections are marked as marked optional
|
||||||
|
# as default. Uncomment to enable.
|
||||||
|
# , (u'건강', 'health')
|
||||||
|
# , (u'레저', 'leisure')
|
||||||
|
# , (u'도서', 'book')
|
||||||
|
# , (u'공연', 'show')
|
||||||
|
# , (u'여성', 'woman')
|
||||||
|
# , (u'여행', 'travel')
|
||||||
|
# , (u'생활정보', 'lifeinfo')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class Donga(BasicNewsRecipe):
|
||||||
|
language = "ko"
|
||||||
|
title = "동아일보"
|
||||||
|
description = "동아일보 기사"
|
||||||
|
__author__ = "Minsik Cho"
|
||||||
|
ignore_duplicate_articles = {"title", "url"}
|
||||||
|
compress_news_images = True
|
||||||
|
no_stylesheets = True
|
||||||
|
oldest_article = 2
|
||||||
|
encoding = "utf-8"
|
||||||
|
|
||||||
|
# RSS Feed in syntax:
|
||||||
|
# https://rss.donga.com/[sections].xml
|
||||||
|
feeds = [(title, "https://rss.donga.com/" + section + ".xml") for (title, section) in sections]
|
||||||
|
|
||||||
|
# Remove logo and print buttons
|
||||||
|
remove_tags = [
|
||||||
|
dict(name="div", attrs={"class": "popHeaderWrap"}),
|
||||||
|
dict(name="div", attrs={"class": "etc"}),
|
||||||
|
]
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
# Original url in syntax:
|
||||||
|
# https://www.donga.com/news/[sections]/article/all/[date]/[gid]/1
|
||||||
|
# Return print version url with syntax:
|
||||||
|
# https://www.donga.com/news/View?gid=[gid]&date=[date]
|
||||||
|
reobject = re.search("(?<=/all/)([0-9]*)/([0-9]*)", url)
|
||||||
|
date = reobject.group(1)
|
||||||
|
gid = reobject.group(2)
|
||||||
|
|
||||||
|
return "https://www.donga.com/news/View?gid=" + gid + "&date=" + date
|
Loading…
x
Reference in New Issue
Block a user