mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
58 lines
2.1 KiB
Plaintext
58 lines
2.1 KiB
Plaintext
# -*- coding: utf-8 -*-
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
|
|
'''
|
|
Profile to download The Kyungyhang
|
|
'''
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
import re
|
|
|
|
|
|
class Kyungyhang(BasicNewsRecipe):
|
|
title = u'Kyungyhang'
|
|
language = 'ko'
|
|
description = u'The Kyungyhang Shinmun articles'
|
|
__author__ = 'Seongkyoun Yoo'
|
|
oldest_article = 7
|
|
max_articles_per_feed = 10
|
|
no_stylesheets = True
|
|
remove_javascript = True
|
|
|
|
preprocess_regexps = [
|
|
(re.compile("<div class='ad_movFocus'.*</html>",
|
|
re.DOTALL | re.IGNORECASE), lambda match: '</html>'),
|
|
]
|
|
|
|
keep_only_tags = [
|
|
dict(name='div', attrs={'class': ['article_title_wrap']}),
|
|
dict(name='div', attrs={'class': ['viewHeader']}),
|
|
dict(name='span', attrs={'class': ['article_txt']})
|
|
]
|
|
|
|
remove_tags_after = dict(id={'sub_bottom'})
|
|
|
|
remove_tags = [
|
|
dict(name='div', attrs={'class': ['widget_top_dable']}),
|
|
dict(name='div', attrs={'class': ['article_bottom_ad']}),
|
|
dict(name='div', attrs={'class': ['article_date']}),
|
|
dict(name='iframe'),
|
|
dict(id={'TdHot'}),
|
|
dict(name='div', attrs={
|
|
'class': ['btn_list', 'bline', 'linebottom', 'bestArticle']}),
|
|
dict(name='dl', attrs={'class': ['CL']}),
|
|
dict(name='ul', attrs={'class': ['tab']}),
|
|
]
|
|
|
|
feeds = [
|
|
(u'정치', 'http://www.khan.co.kr/rss/rssdata/politic_news.xml'),
|
|
(u'경제', 'http://www.khan.co.kr/rss/rssdata/economy_news.xml'),
|
|
(u'사회', 'http://www.khan.co.kr/rss/rssdata/society_news.xml'),
|
|
(u'세계', 'http://www.khan.co.kr/rss/rssdata/kh_world.xml'),
|
|
(u'스포츠', 'http://www.khan.co.kr/rss/rssdata/kh_sports.xml'),
|
|
(u'문화', 'http://www.khan.co.kr/rss/rssdata/culture_news.xml'),
|
|
(u'연예', 'http://www.khan.co.kr/rss/rssdata/kh_entertainment.xml'),
|
|
(u'IT', 'http://www.khan.co.kr/rss/rssdata/it_news.xml'),
|
|
(u'오피니언', 'http://www.khan.co.kr/rss/rssdata/opinion_news.xml'),
|
|
]
|