mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Various Korean news sources by Hoje Lee
Also update various exisitng Korean news sources. Merge branch 'master' of https://github.com/hojel/calibre
This commit is contained in:
commit
f665617c9f
28
recipes/chosun.recipe
Normal file
28
recipes/chosun.recipe
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2015, Hoje Lee <hojelei at gmail.com>'
|
||||||
|
'''
|
||||||
|
Profile to download Chosun.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class ChosunDotcom(BasicNewsRecipe):
|
||||||
|
language = 'ko'
|
||||||
|
title = u'조선일보'
|
||||||
|
description = u'조선닷컴 기사'
|
||||||
|
__author__ = 'Hoje Lee'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'정치', 'http://www.chosun.com/site/data/rss/politics.xml'),
|
||||||
|
(u'조선비즈','http://biz.chosun.com/site/data/rss/rss.xml'),
|
||||||
|
(u'사회', 'http://www.chosun.com/site/data/rss/national.xml'),
|
||||||
|
(u'문화', 'http://www.chosun.com/site/data/rss/culture.xml'),
|
||||||
|
(u'국제', 'http://www.chosun.com/site/data/rss/international.xml'),
|
||||||
|
(u'오피니언','http://www.chosun.com/site/data/rss/editorials.xml'),
|
||||||
|
(u'스포츠', 'http://www.chosun.com/site/data/rss/sports.xml'),
|
||||||
|
(u'연예', 'http://www.chosun.com/site/data/rss/ent.xml'),
|
||||||
|
]
|
@ -1,3 +1,4 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
|
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
|
||||||
'''
|
'''
|
||||||
@ -6,42 +7,34 @@ Profile to download The Hankyoreh
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class Hankyoreh(BasicNewsRecipe):
|
class Hankyoreh(BasicNewsRecipe):
|
||||||
title = u'Hankyoreh'
|
|
||||||
language = 'ko'
|
language = 'ko'
|
||||||
|
title = u'한겨례'
|
||||||
description = u'The Hankyoreh News articles'
|
description = u'The Hankyoreh News articles'
|
||||||
__author__ = 'Seongkyoun Yoo'
|
__author__ = 'Seongkyoun Yoo'
|
||||||
oldest_article = 5
|
oldest_article = 7
|
||||||
recursions = 1
|
max_articles_per_feed = 10
|
||||||
max_articles_per_feed = 5
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='tr', attrs={'height':['60px']}),
|
dict(name='div', attrs ={'class':['article-head']}),
|
||||||
dict(id=['fontSzArea'])
|
dict(name='div', attrs ={'class':['article-text']}),
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(target='_blank'),
|
dict(name='p', attrs={'class':['category']}),
|
||||||
dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
|
|
||||||
dict(name='iframe', attrs={'width':['590']}),
|
|
||||||
]
|
|
||||||
remove_tags_after = [
|
|
||||||
dict(target='_top')
|
|
||||||
]
|
]
|
||||||
|
remove_tags_after = dict(id={'ad_box01'})
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('All News','http://www.hani.co.kr/rss/'),
|
#(u'전체기사', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_all.xml'),
|
||||||
('Politics','http://www.hani.co.kr/rss/politics/'),
|
(u'정치', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_politics.xml'),
|
||||||
('Economy','http://www.hani.co.kr/rss/economy/'),
|
#(u'경제', 'http://www.hani.co.kr/rss/economy/'),
|
||||||
('Society','http://www.hani.co.kr/rss/society/'),
|
(u'사회', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_society.xml'),
|
||||||
('International','http://www.hani.co.kr/rss/international/'),
|
#(u'국제', 'http://www.hani.co.kr/rss/international/'),
|
||||||
('Culture','http://www.hani.co.kr/rss/culture/'),
|
(u'문화', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_culture.xml'),
|
||||||
('Sports','http://www.hani.co.kr/rss/sports/'),
|
(u'스포츠', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_sports.xml'),
|
||||||
('Science','http://www.hani.co.kr/rss/science/'),
|
#(u'과학', 'http://www.hani.co.kr/rss/science/'),
|
||||||
('Opinion','http://www.hani.co.kr/rss/opinion/'),
|
(u'사설·칼럼','http://www.hani.co.kr/ilram/rss/hkr_news_list_opinion.xml'),
|
||||||
('Cartoon','http://www.hani.co.kr/rss/cartoon/'),
|
(u'만화만평', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_cartoon.xml'),
|
||||||
('English Edition','http://www.hani.co.kr/rss/english_edition/'),
|
#(u'한겨례섹션','http://www.hani.co.kr/rss/specialsection/'),
|
||||||
('Specialsection','http://www.hani.co.kr/rss/specialsection/'),
|
|
||||||
('Hanionly','http://www.hani.co.kr/rss/hanionly/'),
|
|
||||||
('Hkronly','http://www.hani.co.kr/rss/hkronly/'),
|
|
||||||
('Multihani','http://www.hani.co.kr/rss/multihani/'),
|
|
||||||
('Lead','http://www.hani.co.kr/rss/lead/'),
|
|
||||||
('Newsrank','http://www.hani.co.kr/rss/newsrank/'),
|
|
||||||
]
|
]
|
||||||
|
@ -10,16 +10,29 @@ class Hankyoreh21(BasicNewsRecipe):
|
|||||||
language = 'ko'
|
language = 'ko'
|
||||||
description = u'The Hankyoreh21 Magazine articles'
|
description = u'The Hankyoreh21 Magazine articles'
|
||||||
__author__ = 'Seongkyoun Yoo'
|
__author__ = 'Seongkyoun Yoo'
|
||||||
oldest_article = 20
|
oldest_article = 30
|
||||||
recursions = 1
|
max_articles_per_feed = 10
|
||||||
max_articles_per_feed = 120
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='font', attrs={'class':'t18bk'}),
|
dict(name='header', attrs ={'class':['article_head']}),
|
||||||
dict(id=['fontSzArea'])
|
dict(name='div', attrs ={'class':['article_body']}),
|
||||||
|
]
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='div', attrs ={'class':['article_tools']}),
|
||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('Hani21','http://h21.hani.co.kr/rss/ '),
|
#('전체기사', 'http://h21.hani.co.kr/rss/'),
|
||||||
|
('표지이야기','http://h21.hani.co.kr/rss/cover/'),
|
||||||
|
('특집', 'http://h21.hani.co.kr/rss/special/'),
|
||||||
|
('정치', 'http://h21.hani.co.kr/rss/politics/'),
|
||||||
|
('경제', 'http://h21.hani.co.kr/rss/economy/'),
|
||||||
|
('사회', 'http://h21.hani.co.kr/rss/society/'),
|
||||||
|
('세계', 'http://h21.hani.co.kr/rss/world/'),
|
||||||
|
('문화', 'http://h21.hani.co.kr/rss/culture/'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
org_url = BasicNewsRecipe.get_article_url(self, article)
|
||||||
|
return "http://h21.hani.co.kr"+org_url if org_url[0]=='/' else org_url
|
||||||
|
35
recipes/joongang.recipe
Normal file
35
recipes/joongang.recipe
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2015, Hoje Lee <hojelei at gmail.com>'
|
||||||
|
'''
|
||||||
|
Profile to download Joongang Ilbo
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class JoongangIlbo(BasicNewsRecipe):
|
||||||
|
language = 'ko'
|
||||||
|
title = u'중앙일보'
|
||||||
|
description = u'중앙일보 신문 기사'
|
||||||
|
__author__ = 'Hoje Lee'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 5
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
#(u'전체기사', 'http://rss.joins.com/joins_news_list.xml'),
|
||||||
|
(u'주요기사', 'http://rss.joins.com/joins_homenews_list.xml'),
|
||||||
|
#(u'경제', 'http://rss.joins.com/joins_money_list.xml'),
|
||||||
|
#(u'사회', 'http://rss.joins.com/joins_life_list.xml'),
|
||||||
|
#(u'정치', 'http://rss.joins.com/joins_politics_list.xml'),
|
||||||
|
### 많이 본 뉴스
|
||||||
|
(u'전체기사', 'http://rss.joins.com/sonagi/joins_sonagi_total_list.xml'),
|
||||||
|
(u'경제', 'http://rss.joins.com/sonagi/joins_sonagi_money_list.xml'),
|
||||||
|
(u'스포츠', 'http://rss.joins.com/sonagi/joins_sonagi_sports_list.xml'),
|
||||||
|
(u'연예', 'http://rss.joins.com/sonagi/joins_sonagi_star_list.xml'),
|
||||||
|
(u'사회', 'http://rss.joins.com/sonagi/joins_sonagi_life_list.xml'),
|
||||||
|
(u'정치', 'http://rss.joins.com/sonagi/joins_sonagi_politics_list.xml'),
|
||||||
|
(u'지구촌', 'http://rss.joins.com/sonagi/joins_sonagi_world_list.xml'),
|
||||||
|
(u'IT과학', 'http://rss.joins.com/sonagi/joins_sonagi_it_list.xml'),
|
||||||
|
(u'사설', 'http://rss.joins.com/sonagi/joins_sonagi_opinion_list.xml'),
|
||||||
|
]
|
@ -1,3 +1,4 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
|
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
|
||||||
'''
|
'''
|
||||||
@ -5,26 +6,34 @@ Profile to download The Kyungyhang
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
class Kyungyhang(BasicNewsRecipe):
|
class Kyungyhang(BasicNewsRecipe):
|
||||||
title = u'Kyungyhang'
|
title = u'Kyungyhang'
|
||||||
language = 'ko'
|
language = 'ko'
|
||||||
description = u'The Kyungyhang Shinmun articles'
|
description = u'The Kyungyhang Shinmun articles'
|
||||||
__author__ = 'Seongkyoun Yoo'
|
__author__ = 'Seongkyoun Yoo'
|
||||||
oldest_article = 20
|
oldest_article = 7
|
||||||
recursions = 2
|
max_articles_per_feed = 10
|
||||||
max_articles_per_feed = 20
|
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
|
preprocess_regexps = [
|
||||||
|
(re.compile("<div class='ad_movFocus'.*</html>", re.DOTALL|re.IGNORECASE), lambda match: '</html>'),
|
||||||
|
]
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs ={'class':['article_title_wrap']}),
|
dict(name='div', attrs ={'class':['article_title_wrap']}),
|
||||||
|
dict(name='div', attrs ={'class':['viewHeader']}),
|
||||||
dict(name='span', attrs ={'class':['article_txt']})
|
dict(name='span', attrs ={'class':['article_txt']})
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags_after = dict(id={'sub_bottom'})
|
remove_tags_after = dict(id={'sub_bottom'})
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
|
dict(name='div', attrs={'class':['widget_top_dable']}),
|
||||||
|
dict(name='div', attrs={'class':['article_bottom_ad']}),
|
||||||
|
dict(name='div', attrs={'class':['article_date']}),
|
||||||
dict(name='iframe'),
|
dict(name='iframe'),
|
||||||
dict(id={'TdHot'}),
|
dict(id={'TdHot'}),
|
||||||
dict(name='div', attrs={'class':['btn_list','bline','linebottom','bestArticle']}),
|
dict(name='div', attrs={'class':['btn_list','bline','linebottom','bestArticle']}),
|
||||||
@ -33,5 +42,14 @@ class Kyungyhang(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
('All News','http://www.khan.co.kr/rss/rssdata/total_news.xml'),
|
#(u'전체기사','http://www.khan.co.kr/rss/rssdata/total_news.xml'),
|
||||||
|
(u'정치', 'http://www.khan.co.kr/rss/rssdata/politic_news.xml'),
|
||||||
|
(u'경제', 'http://www.khan.co.kr/rss/rssdata/economy_news.xml'),
|
||||||
|
(u'사회', 'http://www.khan.co.kr/rss/rssdata/society_news.xml'),
|
||||||
|
(u'세계', 'http://www.khan.co.kr/rss/rssdata/kh_world.xml'),
|
||||||
|
(u'스포츠', 'http://www.khan.co.kr/rss/rssdata/kh_sports.xml'),
|
||||||
|
(u'문화', 'http://www.khan.co.kr/rss/rssdata/culture_news.xml'),
|
||||||
|
(u'연예', 'http://www.khan.co.kr/rss/rssdata/kh_entertainment.xml'),
|
||||||
|
(u'IT', 'http://www.khan.co.kr/rss/rssdata/it_news.xml'),
|
||||||
|
(u'오피니언','http://www.khan.co.kr/rss/rssdata/opinion_news.xml'),
|
||||||
]
|
]
|
||||||
|
34
recipes/maekyung.recipe
Normal file
34
recipes/maekyung.recipe
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2015, Hoje Lee <hojelei at gmail.com>'
|
||||||
|
'''
|
||||||
|
Profile to download Maeil Business
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class MaeilBusiness(BasicNewsRecipe):
|
||||||
|
language = 'ko'
|
||||||
|
title = u'매일경제'
|
||||||
|
description = u'매일경제 신문 기사'
|
||||||
|
__author__ = 'Hoje Lee'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
auto_cleanup = True
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'헤드라인', 'http://file.mk.co.kr/news/rss/rss_30000001.xml'),
|
||||||
|
#(u'전체뉴스', 'http://file.mk.co.kr/news/rss/rss_40300001.xml'),
|
||||||
|
(u'경제', 'http://file.mk.co.kr/news/rss/rss_30100041.xml'),
|
||||||
|
(u'정치', 'http://file.mk.co.kr/news/rss/rss_30200030.xml'),
|
||||||
|
(u'사회', 'http://file.mk.co.kr/news/rss/rss_50400012.xml'),
|
||||||
|
(u'국제', 'http://file.mk.co.kr/news/rss/rss_30300018.xml'),
|
||||||
|
(u'기업ㆍ경영','http://file.mk.co.kr/news/rss/rss_50100032.xml'),
|
||||||
|
(u'증권', 'http://file.mk.co.kr/news/rss/rss_50200011.xml'),
|
||||||
|
(u'부동산', 'http://file.mk.co.kr/news/rss/rss_50300009.xml'),
|
||||||
|
(u'문화ㆍ연예','http://file.mk.co.kr/news/rss/rss_30000023.xml'),
|
||||||
|
(u'패션', 'http://file.mk.co.kr/news/rss/rss_72000001.xml'),
|
||||||
|
(u'스포츠', 'http://file.mk.co.kr/news/rss/rss_71000001.xml'),
|
||||||
|
(u'게임', 'http://file.mk.co.kr/news/rss/rss_50700001.xml'),
|
||||||
|
(u'오피니언', 'http://file.mk.co.kr/news/rss/rss_30500041.xml'),
|
||||||
|
]
|
48
recipes/sisainlive.recipe
Normal file
48
recipes/sisainlive.recipe
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2015, Hoje Lee <hojelei at gmail.com>'
|
||||||
|
'''
|
||||||
|
Profile to download SisaIN Live
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class SisaINLive(BasicNewsRecipe):
|
||||||
|
language = 'ko'
|
||||||
|
title = u'시사인 라이브'
|
||||||
|
description = u'시사인 라이브 기사'
|
||||||
|
__author__ = 'Hoje Lee'
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed = 10
|
||||||
|
auto_cleanup = True
|
||||||
|
"""
|
||||||
|
# manual cleanup
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='div', attrs ={'class':['View_Title']}),
|
||||||
|
dict(name='div', attrs ={'class':['View_Info']}),
|
||||||
|
dict(name='div', attrs ={'class':['View_Time']}),
|
||||||
|
dict(id='articleBody'),
|
||||||
|
]
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='table', attrs ={'width':['320'], 'height':['265']}),
|
||||||
|
]
|
||||||
|
"""
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
#(u'전체기사', 'http://www.sisainlive.com/rss.xml'),
|
||||||
|
(u'인기기사', 'http://www.sisainlive.com/rss/clickTop.xml'),
|
||||||
|
(u'커버스토리','http://www.sisainlive.com/rss/SRN121.xml'),
|
||||||
|
(u'특집', 'http://www.sisainlive.com/rss/SRN122.xml'),
|
||||||
|
(u'정치', 'http://www.sisainlive.com/rss/S1N15.xml'),
|
||||||
|
(u'경제', 'http://www.sisainlive.com/rss/S1N16.xml'),
|
||||||
|
(u'사회', 'http://www.sisainlive.com/rss/S1N17.xml'),
|
||||||
|
(u'문화', 'http://www.sisainlive.com/rss/S1N18.xml'),
|
||||||
|
(u'국제.한반도','http://www.sisainlive.com/rss/S1N4.xml'),
|
||||||
|
(u'실용.과학', 'http://www.sisainlive.com/rss/S1N6.xml'),
|
||||||
|
(u'휴먼&휴', 'http://www.sisainlive.com/rss/S1N19.xml'),
|
||||||
|
(u'인터뷰.오피니언','http://www.sisainlive.com/rss/S1N5.xml'),
|
||||||
|
(u'사진.만화', 'http://www.sisainlive.com/rss/S1N7.xml'),
|
||||||
|
(u'별책부록', 'http://www.sisainlive.com/rss/S1N14.xml'),
|
||||||
|
]
|
Loading…
x
Reference in New Issue
Block a user