mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Updated Ming Pao and Various Taiwanese news sources by Eddie Lau
This commit is contained in:
parent
c7c9ade376
commit
16c92f3d23
42
recipes/china_times.recipe
Normal file
42
recipes/china_times.recipe
Normal file
@ -0,0 +1,42 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
|
||||
title = u'中時電子報'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'焦點', u'http://rss.chinatimes.com/rss/focus-u.rss'),
|
||||
(u'政治', u'http://rss.chinatimes.com/rss/Politic-u.rss'),
|
||||
(u'社會', u'http://rss.chinatimes.com/rss/social-u.rss'),
|
||||
(u'國際', u'http://rss.chinatimes.com/rss/international-u.rss'),
|
||||
(u'兩岸', u'http://rss.chinatimes.com/rss/mainland-u.rss'),
|
||||
(u'地方', u'http://rss.chinatimes.com/rss/local-u.rss'),
|
||||
(u'言論', u'http://rss.chinatimes.com/rss/comment-u.rss'),
|
||||
(u'科技', u'http://rss.chinatimes.com/rss/technology-u.rss'),
|
||||
(u'運動', u'http://rss.chinatimes.com/rss/sport-u.rss'),
|
||||
(u'藝文', u'http://rss.chinatimes.com/rss/philology-u.rss'),
|
||||
#(u'旺報', u'http://rss.chinatimes.com/rss/want-u.rss'),
|
||||
#(u'財經', u'http://rss.chinatimes.com/rss/finance-u.rss'), # broken links
|
||||
#(u'股市', u'http://rss.chinatimes.com/rss/stock-u.rss') # broken links
|
||||
]
|
||||
|
||||
__author__ = 'einstuerzende, updated by Eddie Lau'
|
||||
__version__ = '1.0'
|
||||
language = 'zh'
|
||||
publisher = 'China Times Group'
|
||||
description = 'China Times (Taiwan)'
|
||||
category = 'News, Chinese, Taiwan'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
encoding = 'big5'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
masthead_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
|
||||
cover_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['articlebox','articlebox clearfix']})]
|
||||
remove_tags = [dict(name='div', attrs={'class':['focus-news']})]
|
||||
|
44
recipes/liberty_times.recipe
Normal file
44
recipes/liberty_times.recipe
Normal file
@ -0,0 +1,44 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
# dug from http://www.mobileread.com/forums/showthread.php?p=1012294
|
||||
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1277443634(BasicNewsRecipe):
|
||||
title = u'自由電子報'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'焦點新聞', u'http://www.libertytimes.com.tw/rss/fo.xml'),
|
||||
(u'政治新聞', u'http://www.libertytimes.com.tw/rss/p.xml'),
|
||||
(u'生活新聞', u'http://www.libertytimes.com.tw/rss/life.xml'),
|
||||
(u'國際新聞', u'http://www.libertytimes.com.tw/rss/int.xml'),
|
||||
(u'自由廣場', u'http://www.libertytimes.com.tw/rss/o.xml'),
|
||||
(u'社會新聞', u'http://www.libertytimes.com.tw/rss/so.xml'),
|
||||
(u'體育新聞', u'http://www.libertytimes.com.tw/rss/sp.xml'),
|
||||
(u'財經焦點', u'http://www.libertytimes.com.tw/rss/e.xml'),
|
||||
(u'證券理財', u'http://www.libertytimes.com.tw/rss/stock.xml'),
|
||||
(u'影視焦點', u'http://www.libertytimes.com.tw/rss/show.xml'),
|
||||
(u'北部新聞', u'http://www.libertytimes.com.tw/rss/north.xml'),
|
||||
(u'中部新聞', u'http://www.libertytimes.com.tw/rss/center.xml'),
|
||||
(u'南部新聞', u'http://www.libertytimes.com.tw/rss/south.xml'),
|
||||
(u'大台北新聞', u'http://www.libertytimes.com.tw/rss/taipei.xml'),
|
||||
(u'藝術文化', u'http://www.libertytimes.com.tw/rss/art.xml'),
|
||||
]
|
||||
extra_css = '''span[class='insubject1'][id='newtitle'] {font-size:200%; font-weight:bold;}'''
|
||||
__author__ = 'einstuerzende, updated by Eddie Lau'
|
||||
__version__ = '1.1'
|
||||
language = 'zh'
|
||||
publisher = 'Liberty Times Group'
|
||||
description = 'Liberty Times (Taiwan)'
|
||||
category = 'News, Chinese, Taiwan'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
encoding = 'big5'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
masthead_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
|
||||
cover_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
|
||||
keep_only_tags = [dict(name='td', attrs={'id':['newsContent']})]
|
||||
|
@ -1,15 +1,18 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010-2011, Eddie Lau'
|
||||
|
||||
# Users of Kindle 3 (with limited system-level CJK support)
|
||||
# Users of Kindle 3 with limited system-level CJK support
|
||||
# please replace the following "True" with "False".
|
||||
__MakePeriodical__ = True
|
||||
# Turn it to True if your device supports display of CJK titles
|
||||
# Turn below to true if your device supports display of CJK titles
|
||||
__UseChineseTitle__ = False
|
||||
|
||||
# Trun below to true if you wish to use life.mingpao.com as the main article source
|
||||
__UseLife__ = True
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
||||
2011/03/06: add new articles for finance section, also a new section "Columns"
|
||||
2011/02/28: rearrange the sections
|
||||
[Disabled until Kindle has better CJK support and can remember last (section,article) read in Sections & Articles
|
||||
@ -32,6 +35,7 @@ import os, datetime, re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
@ -60,10 +64,11 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||
dict(attrs={'class':['photo']})
|
||||
dict(attrs={'class':['photo']}),
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
|
||||
]
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']}), # for the finance page
|
||||
dict(attrs={'id':['newscontent135']}), # for the finance page from mpfinance.com
|
||||
dict(name='table')] # for content fetched from life.mingpao.com
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [
|
||||
@ -130,7 +135,9 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchday(self):
|
||||
# dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
# dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
def get_cover_url(self):
|
||||
@ -146,6 +153,28 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
|
||||
if __UseLife__:
|
||||
for title, url, keystr in [(u'\u8981\u805e Headline', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalga', 'nal'),
|
||||
(u'\u6e2f\u805e Local', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgb', 'nal'),
|
||||
(u'\u6559\u80b2 Education', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalgf', 'nal'),
|
||||
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr', 'nal'),
|
||||
(u'\u8ad6\u58c7 Forum', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalfa', 'nal'),
|
||||
(u'\u4e2d\u570b China', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalca', 'nal'),
|
||||
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
|
||||
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
|
||||
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
else:
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||
@ -196,6 +225,7 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
|
||||
return feeds
|
||||
|
||||
# parse from news.mingpao.com
|
||||
def parse_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
@ -214,6 +244,23 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# parse from life.mingpao.com
|
||||
def parse_section2(self, url, keystr):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_ed_section(self, url):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
@ -426,3 +473,4 @@ class MPHKRecipe(BasicNewsRecipe):
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
67
recipes/united_daily.recipe
Normal file
67
recipes/united_daily.recipe
Normal file
@ -0,0 +1,67 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class UnitedDaily(BasicNewsRecipe):
|
||||
title = u'聯合新聞網'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
|
||||
feeds = [(u'焦點', u'http://udn.com/udnrss/focus.xml'),
|
||||
(u'政治', u'http://udn.com/udnrss/politics.xml'),
|
||||
(u'社會', u'http://udn.com/udnrss/social.xml'),
|
||||
(u'生活', u'http://udn.com/udnrss/life.xml'),
|
||||
(u'綜合', u'http://udn.com/udnrss/education.xml'),
|
||||
(u'意見評論', u'http://udn.com/udnrss/opinion.xml'),
|
||||
(u'大台北', u'http://udn.com/udnrss/local_taipei.xml'),
|
||||
(u'桃竹苗', u'http://udn.com/udnrss/local_tyhcml.xml'),
|
||||
(u'中彰投', u'http://udn.com/udnrss/local_tcchnt.xml'),
|
||||
(u'雲嘉南', u'http://udn.com/udnrss/local_ylcytn.xml'),
|
||||
(u'高屏離島', u'http://udn.com/udnrss/local_ksptisland.xml'),
|
||||
(u'基宜花東', u'http://udn.com/udnrss/local_klilhltt.xml'),
|
||||
(u'台灣百寶鄉', u'http://udn.com/udnrss/local_oddlyenough.xml'),
|
||||
(u'兩岸要聞', u'http://udn.com/udnrss/mainland.xml'),
|
||||
(u'國際焦點', u'http://udn.com/udnrss/international.xml'),
|
||||
(u'台商經貿', u'http://udn.com/udnrss/financechina.xml'),
|
||||
(u'國際財經', u'http://udn.com/udnrss/financeworld.xml'),
|
||||
(u'財經焦點', u'http://udn.com/udnrss/financesfocus.xml'),
|
||||
(u'股市要聞', u'http://udn.com/udnrss/stock.xml'),
|
||||
(u'股市快訊', u'http://udn.com/udnrss/stklatest.xml'),
|
||||
(u'稅務法務', u'http://udn.com/udnrss/tax.xml'),
|
||||
(u'房市情報', u'http://udn.com/udnrss/houses.xml'),
|
||||
(u'棒球', u'http://udn.com/udnrss/baseball.xml'),
|
||||
(u'籃球', u'http://udn.com/udnrss/basketball.xml'),
|
||||
(u'體壇動態', u'http://udn.com/udnrss/sportsfocus.xml'),
|
||||
(u'熱門星聞', u'http://udn.com/udnrss/starsfocus.xml'),
|
||||
(u'廣電港陸', u'http://udn.com/udnrss/tv.xml'),
|
||||
(u'海外星球', u'http://udn.com/udnrss/starswestern.xml'),
|
||||
(u'日韓星情', u'http://udn.com/udnrss/starsjk.xml'),
|
||||
(u'電影世界', u'http://udn.com/udnrss/movie.xml'),
|
||||
(u'流行音樂', u'http://udn.com/udnrss/music.xml'),
|
||||
(u'觀點專題', u'http://udn.com/udnrss/starssubject.xml'),
|
||||
(u'食樂指南', u'http://udn.com/udnrss/food.xml'),
|
||||
(u'折扣好康', u'http://udn.com/udnrss/shopping.xml'),
|
||||
(u'醫藥新聞', u'http://udn.com/udnrss/health.xml'),
|
||||
(u'家婦繽紛', u'http://udn.com/udnrss/benfen.xml'),
|
||||
(u'談星論命', u'http://udn.com/udnrss/astrology.xml'),
|
||||
(u'文化副刊', u'http://udn.com/udnrss/reading.xml'),
|
||||
]
|
||||
|
||||
extra_css = '''div[id='story_title'] {font-size:200%; font-weight:bold;}'''
|
||||
|
||||
__author__ = 'Eddie Lau'
|
||||
__version__ = '1.0'
|
||||
language = 'zh'
|
||||
publisher = 'United Daily News Group'
|
||||
description = 'United Daily (Taiwan)'
|
||||
category = 'News, Chinese, Taiwan'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
encoding = 'big5'
|
||||
conversion_options = {'linearize_tables':True}
|
||||
masthead_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
|
||||
cover_url = 'http://udn.com/NEWS/2004/images/logo_udn.gif'
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['story_title','story_author', 'story']})]
|
||||
remove_tags = [dict(name='div', attrs={'id':['mvouter']})]
|
Loading…
x
Reference in New Issue
Block a user