mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Various Korean news sources by Hoje Lee
Also update various exisitng Korean news sources. Merge branch 'master' of https://github.com/hojel/calibre
This commit is contained in:
commit
f665617c9f
28
recipes/chosun.recipe
Normal file
28
recipes/chosun.recipe
Normal file
@ -0,0 +1,28 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Hoje Lee <hojelei at gmail.com>'
|
||||
'''
|
||||
Profile to download Chosun.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class ChosunDotcom(BasicNewsRecipe):
|
||||
language = 'ko'
|
||||
title = u'조선일보'
|
||||
description = u'조선닷컴 기사'
|
||||
__author__ = 'Hoje Lee'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 10
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [
|
||||
(u'정치', 'http://www.chosun.com/site/data/rss/politics.xml'),
|
||||
(u'조선비즈','http://biz.chosun.com/site/data/rss/rss.xml'),
|
||||
(u'사회', 'http://www.chosun.com/site/data/rss/national.xml'),
|
||||
(u'문화', 'http://www.chosun.com/site/data/rss/culture.xml'),
|
||||
(u'국제', 'http://www.chosun.com/site/data/rss/international.xml'),
|
||||
(u'오피니언','http://www.chosun.com/site/data/rss/editorials.xml'),
|
||||
(u'스포츠', 'http://www.chosun.com/site/data/rss/sports.xml'),
|
||||
(u'연예', 'http://www.chosun.com/site/data/rss/ent.xml'),
|
||||
]
|
@ -1,3 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
|
||||
'''
|
||||
@ -6,42 +7,34 @@ Profile to download The Hankyoreh
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Hankyoreh(BasicNewsRecipe):
|
||||
title = u'Hankyoreh'
|
||||
language = 'ko'
|
||||
title = u'한겨례'
|
||||
description = u'The Hankyoreh News articles'
|
||||
__author__ = 'Seongkyoun Yoo'
|
||||
oldest_article = 5
|
||||
recursions = 1
|
||||
max_articles_per_feed = 5
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 10
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='tr', attrs={'height':['60px']}),
|
||||
dict(id=['fontSzArea'])
|
||||
dict(name='div', attrs ={'class':['article-head']}),
|
||||
dict(name='div', attrs ={'class':['article-text']}),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(target='_blank'),
|
||||
dict(name='td', attrs={'style':['padding: 10px 8px 5px 8px;']}),
|
||||
dict(name='iframe', attrs={'width':['590']}),
|
||||
]
|
||||
remove_tags_after = [
|
||||
dict(target='_top')
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='p', attrs={'class':['category']}),
|
||||
]
|
||||
remove_tags_after = dict(id={'ad_box01'})
|
||||
|
||||
feeds = [
|
||||
('All News','http://www.hani.co.kr/rss/'),
|
||||
('Politics','http://www.hani.co.kr/rss/politics/'),
|
||||
('Economy','http://www.hani.co.kr/rss/economy/'),
|
||||
('Society','http://www.hani.co.kr/rss/society/'),
|
||||
('International','http://www.hani.co.kr/rss/international/'),
|
||||
('Culture','http://www.hani.co.kr/rss/culture/'),
|
||||
('Sports','http://www.hani.co.kr/rss/sports/'),
|
||||
('Science','http://www.hani.co.kr/rss/science/'),
|
||||
('Opinion','http://www.hani.co.kr/rss/opinion/'),
|
||||
('Cartoon','http://www.hani.co.kr/rss/cartoon/'),
|
||||
('English Edition','http://www.hani.co.kr/rss/english_edition/'),
|
||||
('Specialsection','http://www.hani.co.kr/rss/specialsection/'),
|
||||
('Hanionly','http://www.hani.co.kr/rss/hanionly/'),
|
||||
('Hkronly','http://www.hani.co.kr/rss/hkronly/'),
|
||||
('Multihani','http://www.hani.co.kr/rss/multihani/'),
|
||||
('Lead','http://www.hani.co.kr/rss/lead/'),
|
||||
('Newsrank','http://www.hani.co.kr/rss/newsrank/'),
|
||||
#(u'전체기사', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_all.xml'),
|
||||
(u'정치', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_politics.xml'),
|
||||
#(u'경제', 'http://www.hani.co.kr/rss/economy/'),
|
||||
(u'사회', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_society.xml'),
|
||||
#(u'국제', 'http://www.hani.co.kr/rss/international/'),
|
||||
(u'문화', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_culture.xml'),
|
||||
(u'스포츠', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_sports.xml'),
|
||||
#(u'과학', 'http://www.hani.co.kr/rss/science/'),
|
||||
(u'사설·칼럼','http://www.hani.co.kr/ilram/rss/hkr_news_list_opinion.xml'),
|
||||
(u'만화만평', 'http://www.hani.co.kr/ilram/rss/hkr_news_list_cartoon.xml'),
|
||||
#(u'한겨례섹션','http://www.hani.co.kr/rss/specialsection/'),
|
||||
]
|
||||
|
@ -9,17 +9,30 @@ class Hankyoreh21(BasicNewsRecipe):
|
||||
title = u'Hankyoreh21'
|
||||
language = 'ko'
|
||||
description = u'The Hankyoreh21 Magazine articles'
|
||||
__author__ = 'Seongkyoun Yoo'
|
||||
oldest_article = 20
|
||||
recursions = 1
|
||||
max_articles_per_feed = 120
|
||||
no_stylesheets = True
|
||||
__author__ = 'Seongkyoun Yoo'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 10
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
keep_only_tags = [
|
||||
dict(name='font', attrs={'class':'t18bk'}),
|
||||
dict(id=['fontSzArea'])
|
||||
dict(name='header', attrs ={'class':['article_head']}),
|
||||
dict(name='div', attrs ={'class':['article_body']}),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs ={'class':['article_tools']}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('Hani21','http://h21.hani.co.kr/rss/ '),
|
||||
#('전체기사', 'http://h21.hani.co.kr/rss/'),
|
||||
('표지이야기','http://h21.hani.co.kr/rss/cover/'),
|
||||
('특집', 'http://h21.hani.co.kr/rss/special/'),
|
||||
('정치', 'http://h21.hani.co.kr/rss/politics/'),
|
||||
('경제', 'http://h21.hani.co.kr/rss/economy/'),
|
||||
('사회', 'http://h21.hani.co.kr/rss/society/'),
|
||||
('세계', 'http://h21.hani.co.kr/rss/world/'),
|
||||
('문화', 'http://h21.hani.co.kr/rss/culture/'),
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
org_url = BasicNewsRecipe.get_article_url(self, article)
|
||||
return "http://h21.hani.co.kr"+org_url if org_url[0]=='/' else org_url
|
||||
|
35
recipes/joongang.recipe
Normal file
35
recipes/joongang.recipe
Normal file
@ -0,0 +1,35 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Hoje Lee <hojelei at gmail.com>'
|
||||
'''
|
||||
Profile to download Joongang Ilbo
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class JoongangIlbo(BasicNewsRecipe):
|
||||
language = 'ko'
|
||||
title = u'중앙일보'
|
||||
description = u'중앙일보 신문 기사'
|
||||
__author__ = 'Hoje Lee'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 5
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [
|
||||
#(u'전체기사', 'http://rss.joins.com/joins_news_list.xml'),
|
||||
(u'주요기사', 'http://rss.joins.com/joins_homenews_list.xml'),
|
||||
#(u'경제', 'http://rss.joins.com/joins_money_list.xml'),
|
||||
#(u'사회', 'http://rss.joins.com/joins_life_list.xml'),
|
||||
#(u'정치', 'http://rss.joins.com/joins_politics_list.xml'),
|
||||
### 많이 본 뉴스
|
||||
(u'전체기사', 'http://rss.joins.com/sonagi/joins_sonagi_total_list.xml'),
|
||||
(u'경제', 'http://rss.joins.com/sonagi/joins_sonagi_money_list.xml'),
|
||||
(u'스포츠', 'http://rss.joins.com/sonagi/joins_sonagi_sports_list.xml'),
|
||||
(u'연예', 'http://rss.joins.com/sonagi/joins_sonagi_star_list.xml'),
|
||||
(u'사회', 'http://rss.joins.com/sonagi/joins_sonagi_life_list.xml'),
|
||||
(u'정치', 'http://rss.joins.com/sonagi/joins_sonagi_politics_list.xml'),
|
||||
(u'지구촌', 'http://rss.joins.com/sonagi/joins_sonagi_world_list.xml'),
|
||||
(u'IT과학', 'http://rss.joins.com/sonagi/joins_sonagi_it_list.xml'),
|
||||
(u'사설', 'http://rss.joins.com/sonagi/joins_sonagi_opinion_list.xml'),
|
||||
]
|
@ -1,3 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Seongkyoun Yoo <seongkyoun.yoo at gmail.com>'
|
||||
'''
|
||||
@ -5,26 +6,34 @@ Profile to download The Kyungyhang
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class Kyungyhang(BasicNewsRecipe):
|
||||
title = u'Kyungyhang'
|
||||
language = 'ko'
|
||||
description = u'The Kyungyhang Shinmun articles'
|
||||
__author__ = 'Seongkyoun Yoo'
|
||||
oldest_article = 20
|
||||
recursions = 2
|
||||
max_articles_per_feed = 20
|
||||
no_stylesheets = True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 10
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile("<div class='ad_movFocus'.*</html>", re.DOTALL|re.IGNORECASE), lambda match: '</html>'),
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs ={'class':['article_title_wrap']}),
|
||||
dict(name='div', attrs ={'class':['viewHeader']}),
|
||||
dict(name='span', attrs ={'class':['article_txt']})
|
||||
]
|
||||
|
||||
remove_tags_after = dict(id={'sub_bottom'})
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['widget_top_dable']}),
|
||||
dict(name='div', attrs={'class':['article_bottom_ad']}),
|
||||
dict(name='div', attrs={'class':['article_date']}),
|
||||
dict(name='iframe'),
|
||||
dict(id={'TdHot'}),
|
||||
dict(name='div', attrs={'class':['btn_list','bline','linebottom','bestArticle']}),
|
||||
@ -33,5 +42,14 @@ class Kyungyhang(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('All News','http://www.khan.co.kr/rss/rssdata/total_news.xml'),
|
||||
#(u'전체기사','http://www.khan.co.kr/rss/rssdata/total_news.xml'),
|
||||
(u'정치', 'http://www.khan.co.kr/rss/rssdata/politic_news.xml'),
|
||||
(u'경제', 'http://www.khan.co.kr/rss/rssdata/economy_news.xml'),
|
||||
(u'사회', 'http://www.khan.co.kr/rss/rssdata/society_news.xml'),
|
||||
(u'세계', 'http://www.khan.co.kr/rss/rssdata/kh_world.xml'),
|
||||
(u'스포츠', 'http://www.khan.co.kr/rss/rssdata/kh_sports.xml'),
|
||||
(u'문화', 'http://www.khan.co.kr/rss/rssdata/culture_news.xml'),
|
||||
(u'연예', 'http://www.khan.co.kr/rss/rssdata/kh_entertainment.xml'),
|
||||
(u'IT', 'http://www.khan.co.kr/rss/rssdata/it_news.xml'),
|
||||
(u'오피니언','http://www.khan.co.kr/rss/rssdata/opinion_news.xml'),
|
||||
]
|
||||
|
34
recipes/maekyung.recipe
Normal file
34
recipes/maekyung.recipe
Normal file
@ -0,0 +1,34 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Hoje Lee <hojelei at gmail.com>'
|
||||
'''
|
||||
Profile to download Maeil Business
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MaeilBusiness(BasicNewsRecipe):
|
||||
language = 'ko'
|
||||
title = u'매일경제'
|
||||
description = u'매일경제 신문 기사'
|
||||
__author__ = 'Hoje Lee'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 10
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [
|
||||
(u'헤드라인', 'http://file.mk.co.kr/news/rss/rss_30000001.xml'),
|
||||
#(u'전체뉴스', 'http://file.mk.co.kr/news/rss/rss_40300001.xml'),
|
||||
(u'경제', 'http://file.mk.co.kr/news/rss/rss_30100041.xml'),
|
||||
(u'정치', 'http://file.mk.co.kr/news/rss/rss_30200030.xml'),
|
||||
(u'사회', 'http://file.mk.co.kr/news/rss/rss_50400012.xml'),
|
||||
(u'국제', 'http://file.mk.co.kr/news/rss/rss_30300018.xml'),
|
||||
(u'기업ㆍ경영','http://file.mk.co.kr/news/rss/rss_50100032.xml'),
|
||||
(u'증권', 'http://file.mk.co.kr/news/rss/rss_50200011.xml'),
|
||||
(u'부동산', 'http://file.mk.co.kr/news/rss/rss_50300009.xml'),
|
||||
(u'문화ㆍ연예','http://file.mk.co.kr/news/rss/rss_30000023.xml'),
|
||||
(u'패션', 'http://file.mk.co.kr/news/rss/rss_72000001.xml'),
|
||||
(u'스포츠', 'http://file.mk.co.kr/news/rss/rss_71000001.xml'),
|
||||
(u'게임', 'http://file.mk.co.kr/news/rss/rss_50700001.xml'),
|
||||
(u'오피니언', 'http://file.mk.co.kr/news/rss/rss_30500041.xml'),
|
||||
]
|
48
recipes/sisainlive.recipe
Normal file
48
recipes/sisainlive.recipe
Normal file
@ -0,0 +1,48 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Hoje Lee <hojelei at gmail.com>'
|
||||
'''
|
||||
Profile to download SisaIN Live
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class SisaINLive(BasicNewsRecipe):
|
||||
language = 'ko'
|
||||
title = u'시사인 라이브'
|
||||
description = u'시사인 라이브 기사'
|
||||
__author__ = 'Hoje Lee'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 10
|
||||
auto_cleanup = True
|
||||
"""
|
||||
# manual cleanup
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs ={'class':['View_Title']}),
|
||||
dict(name='div', attrs ={'class':['View_Info']}),
|
||||
dict(name='div', attrs ={'class':['View_Time']}),
|
||||
dict(id='articleBody'),
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='table', attrs ={'width':['320'], 'height':['265']}),
|
||||
]
|
||||
"""
|
||||
|
||||
feeds = [
|
||||
#(u'전체기사', 'http://www.sisainlive.com/rss.xml'),
|
||||
(u'인기기사', 'http://www.sisainlive.com/rss/clickTop.xml'),
|
||||
(u'커버스토리','http://www.sisainlive.com/rss/SRN121.xml'),
|
||||
(u'특집', 'http://www.sisainlive.com/rss/SRN122.xml'),
|
||||
(u'정치', 'http://www.sisainlive.com/rss/S1N15.xml'),
|
||||
(u'경제', 'http://www.sisainlive.com/rss/S1N16.xml'),
|
||||
(u'사회', 'http://www.sisainlive.com/rss/S1N17.xml'),
|
||||
(u'문화', 'http://www.sisainlive.com/rss/S1N18.xml'),
|
||||
(u'국제.한반도','http://www.sisainlive.com/rss/S1N4.xml'),
|
||||
(u'실용.과학', 'http://www.sisainlive.com/rss/S1N6.xml'),
|
||||
(u'휴먼&휴', 'http://www.sisainlive.com/rss/S1N19.xml'),
|
||||
(u'인터뷰.오피니언','http://www.sisainlive.com/rss/S1N5.xml'),
|
||||
(u'사진.만화', 'http://www.sisainlive.com/rss/S1N7.xml'),
|
||||
(u'별책부록', 'http://www.sisainlive.com/rss/S1N14.xml'),
|
||||
]
|
Loading…
x
Reference in New Issue
Block a user