mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
59 lines
1.8 KiB
Python
59 lines
1.8 KiB
Python
import re
|
|
|
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
|
|
# Comment out sections you are not interested in
|
|
sections = [
|
|
('정치', 'politics'),
|
|
('사회', 'national'),
|
|
('경제', 'economy'),
|
|
('국제', 'international'),
|
|
('사설칼럼', 'editorials'),
|
|
('의학과학', 'science'),
|
|
('문화연예', 'culture'),
|
|
('스포츠', 'sports'),
|
|
('사람속으로', 'inmul'),
|
|
# Following sections are marked as marked optional
|
|
# as default. Uncomment to enable.
|
|
# (u'건강', 'health'),
|
|
# (u'레저', 'leisure'),
|
|
# (u'도서', 'book'),
|
|
# (u'공연', 'show'),
|
|
# (u'여성', 'woman'),
|
|
# (u'여행', 'travel'),
|
|
# (u'생활정보', 'lifeinfo'),
|
|
]
|
|
|
|
|
|
class Donga(BasicNewsRecipe):
|
|
language = 'ko'
|
|
title = '동아일보'
|
|
description = '동아일보 기사'
|
|
__author__ = 'Minsik Cho'
|
|
ignore_duplicate_articles = {'title', 'url'}
|
|
compress_news_images = True
|
|
no_stylesheets = True
|
|
oldest_article = 2
|
|
encoding = 'utf-8'
|
|
|
|
# RSS Feed in syntax:
|
|
# https://rss.donga.com/[sections].xml
|
|
feeds = [(title, 'https://rss.donga.com/' + section + '.xml') for (title, section) in sections]
|
|
|
|
# Remove logo and print buttons
|
|
remove_tags = [
|
|
dict(name='div', attrs={'class': 'popHeaderWrap'}),
|
|
dict(name='div', attrs={'class': 'etc'}),
|
|
]
|
|
|
|
def print_version(self, url):
|
|
# Original url in syntax:
|
|
# https://www.donga.com/news/[sections]/article/all/[date]/[gid]/1
|
|
# Return print version url with syntax:
|
|
# https://www.donga.com/news/View?gid=[gid]&date=[date]
|
|
reobject = re.search(r'(?<=/all/)([0-9]*)/([0-9]*)', url)
|
|
date = reobject.group(1)
|
|
gid = reobject.group(2)
|
|
|
|
return 'https://www.donga.com/news/View?gid=' + gid + '&date=' + date
|