calibre/recipes/donga.recipe
2025-01-24 11:14:20 +01:00

59 lines
1.8 KiB
Python

import re
from calibre.web.feeds.recipes import BasicNewsRecipe
# Comment out sections you are not interested in
sections = [
('정치', 'politics'),
('사회', 'national'),
('경제', 'economy'),
('국제', 'international'),
('사설칼럼', 'editorials'),
('의학과학', 'science'),
('문화연예', 'culture'),
('스포츠', 'sports'),
('사람속으로', 'inmul'),
# Following sections are marked as marked optional
# as default. Uncomment to enable.
# (u'건강', 'health'),
# (u'레저', 'leisure'),
# (u'도서', 'book'),
# (u'공연', 'show'),
# (u'여성', 'woman'),
# (u'여행', 'travel'),
# (u'생활정보', 'lifeinfo'),
]
class Donga(BasicNewsRecipe):
language = 'ko'
title = '동아일보'
description = '동아일보 기사'
__author__ = 'Minsik Cho'
ignore_duplicate_articles = {'title', 'url'}
compress_news_images = True
no_stylesheets = True
oldest_article = 2
encoding = 'utf-8'
# RSS Feed in syntax:
# https://rss.donga.com/[sections].xml
feeds = [(title, 'https://rss.donga.com/' + section + '.xml') for (title, section) in sections]
# Remove logo and print buttons
remove_tags = [
dict(name='div', attrs={'class': 'popHeaderWrap'}),
dict(name='div', attrs={'class': 'etc'}),
]
def print_version(self, url):
# Original url in syntax:
# https://www.donga.com/news/[sections]/article/all/[date]/[gid]/1
# Return print version url with syntax:
# https://www.donga.com/news/View?gid=[gid]&date=[date]
reobject = re.search(r'(?<=/all/)([0-9]*)/([0-9]*)', url)
date = reobject.group(1)
gid = reobject.group(2)
return 'https://www.donga.com/news/View?gid=' + gid + '&date=' + date