Update Liberty Times, Apple Daily, China Times and House News

This commit is contained in:
Kovid Goyal 2015-04-06 10:32:56 +05:30
parent 35f565a425
commit 18d1d0d3e8
4 changed files with 58 additions and 49 deletions

View File

@ -1,7 +1,7 @@
# vim:fileencoding=UTF-8 # vim:fileencoding=UTF-8
from __future__ import unicode_literals from __future__ import unicode_literals
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2013, Eddie Lau' __copyright__ = '2013-2015, Eddie Lau'
__Date__ = '' __Date__ = ''
from calibre import (__appname__, force_unicode, strftime) from calibre import (__appname__, force_unicode, strftime)
@ -98,9 +98,10 @@ class AppleDaily(BasicNewsRecipe):
ul = soup.find(attrs={'class':'menu'}) ul = soup.find(attrs={'class':'menu'})
sectionList = [] sectionList = []
for li in ul.findAll('li'): for li in ul.findAll('li'):
a = 'http://hkm.appledaily.com/' + li.find('a', href=True).get('href', False) relativea = li.find('a', href=True).get('href', False)
a = 'http://hkm.appledaily.com/' + relativea
title = li.find('a', text=True).strip() title = li.find('a', text=True).strip()
if not title == u'動新聞': if (not title == u'動新聞') and (relativea.startswith('list.php')):
sectionList.append((title, a)) sectionList.append((title, a))
for title, url in sectionList: for title, url in sectionList:
articles = self.parse_section(url) articles = self.parse_section(url)
@ -273,3 +274,4 @@ class AppleDaily(BasicNewsRecipe):
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file) opf.render(opf_file, ncx_file)

View File

@ -9,23 +9,23 @@ class AdvancedUserRecipe1277443634(BasicNewsRecipe):
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
feeds = [(u'焦點', u'http://rss.chinatimes.com/rss/focus-u.rss'), feeds = [(u'焦點要聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-focus'),
(u'政治', u'http://rss.chinatimes.com/rss/Politic-u.rss'), (u'生活新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-life'),
(u'社會', u'http://rss.chinatimes.com/rss/social-u.rss'), (u'社會新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-society'),
(u'國際', u'http://rss.chinatimes.com/rss/international-u.rss'), (u'兩岸國際', u'http://feeds.feedburner.com/chinatimes/chinatimes-international'),
(u'兩岸', u'http://rss.chinatimes.com/rss/mainland-u.rss'), (u'時論廣場', u'http://feeds.feedburner.com/chinatimes/chinatimes-comment'),
(u'地方', u'http://rss.chinatimes.com/rss/local-u.rss'), (u'藝文副刊', u'http://feeds.feedburner.com/chinatimes/chinatimes-philology'),
(u'言論', u'http://rss.chinatimes.com/rss/comment-u.rss'), (u'地方新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-local'),
(u'科技', u'http://rss.chinatimes.com/rss/technology-u.rss'), (u'財經焦點', u'http://feeds.feedburner.com/chinatimes/chinatimes-finance'),
(u'運動', u'http://rss.chinatimes.com/rss/sport-u.rss'), (u'運動天地', u'http://feeds.feedburner.com/chinatimes/chinatimes-sport'),
(u'藝文', u'http://rss.chinatimes.com/rss/philology-u.rss'), (u'娛樂新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-showbiz'),
#(u'旺報', u'http://rss.chinatimes.com/rss/want-u.rss'), (u'時尚消費', u'http://feeds.feedburner.com/chinatimes/chinatimes-fashion'),
#(u'財經', u'http://rss.chinatimes.com/rss/finance-u.rss'), # broken links #(u'財經', u'http://rss.chinatimes.com/rss/finance-u.rss'), # broken links
#(u'股市', u'http://rss.chinatimes.com/rss/stock-u.rss') # broken links #(u'股市', u'http://rss.chinatimes.com/rss/stock-u.rss') # broken links
] ]
__author__ = 'einstuerzende, updated by Eddie Lau' __author__ = 'einstuerzende, updated by Eddie Lau'
__version__ = '1.0' __version__ = '1.1'
language = 'zh' language = 'zh'
publisher = 'China Times Group' publisher = 'China Times Group'
description = 'China Times (Taiwan)' description = 'China Times (Taiwan)'
@ -33,10 +33,12 @@ class AdvancedUserRecipe1277443634(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
encoding = 'big5' auto_cleanup = True
encoding = 'utf-8'
conversion_options = {'linearize_tables':True} conversion_options = {'linearize_tables':True}
masthead_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif' masthead_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
cover_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif' cover_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif'
keep_only_tags = [dict(name='div', attrs={'class':['articlebox','articlebox clearfix']})] #keep_only_tags = [dict(name='div', attrs={'class':['articlebox','articlebox clearfix']})]
remove_tags = [dict(name='div', attrs={'class':['focus-news']})] #remove_tags = [dict(name='div', attrs={'class':['focus-news']})]

View File

@ -1,30 +1,31 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2012, Eddie Lau' __copyright__ = '2012-2015, Eddie Lau'
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipeHouseNews(BasicNewsRecipe): class AdvancedUserRecipeHouseNews(BasicNewsRecipe):
title = u'House News \u4e3b\u5834\u65b0\u805e' title = u'The House News Bloggers 主場博客'
__author__ = 'Eddie Lau' __author__ = 'Eddie Lau'
publisher = 'House News' publisher = 'The House News Bloggers'
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
auto_cleanup = False auto_cleanup = False
no_stylesheets = True
language = 'zh' language = 'zh'
encoding = 'utf-8' encoding = 'utf-8'
description = 'http://thehousenews.com' description = 'http://thehousenewsbloggers.net'
category = 'Chinese, Blogs, Opinion, News, Hong Kong' category = 'Chinese, Blogs, Opinion, News, Hong Kong'
masthead_url = 'http://thehousenews.com/static/images/housebeta.jpg' masthead_url = 'http://thehousenewsbloggers.files.wordpress.com/2014/09/screen-shot-2014-09-11-at-8-55-13.png'
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} p[class=date] {font-size:50%;} div[class=author] {font-size:75%;} p[class=caption] {font-size:50%;}' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} p[class=date] {font-size:50%;} div[class=author] {font-size:75%;} p[class=caption] {font-size:50%;}'
feeds = [(u'Latest', u'http://thehousenews.com/rss/')] feeds = [(u'Latest', u'http://thehousenewsbloggers.net/feed/')]
keep_only_tags = [dict(name='h1'), keep_only_tags = [dict(name='h1', attrs={'class':['title']}),
dict(name='div', attrs={'class':['photo']}), dict(name='span', attrs={'class':['author vcard']}),
dict(name='p', attrs={'class':'caption'}), dict(name='time', attrs={'class':['entry-date']}),
dict(name='div', attrs={'class':'articleTextWrap'}), dict(name='section', attrs={'class':['entry']})]
dict(name='div', attrs={'class':['author']}), remove_tags = [dict(name='div', attrs={'id':['jp-post-flair']})]
dict(name='p', attrs={'class':'date'})]
def populate_article_metadata(self, article, soup, first): def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'): if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img') picdiv = soup.find('img')
if picdiv is not None: if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src']) self.add_toc_thumbnail(article,picdiv['src'])

View File

@ -10,25 +10,27 @@ class AdvancedUserRecipe1277443634(BasicNewsRecipe):
oldest_article = 1 oldest_article = 1
max_articles_per_feed = 100 max_articles_per_feed = 100
feeds = [(u'焦點新聞', u'http://www.libertytimes.com.tw/rss/fo.xml'), feeds = [(u'頭版', u'http://news.ltn.com.tw/rss/focus.xml'),
(u'政治新聞', u'http://www.libertytimes.com.tw/rss/p.xml'), (u'政治', u'http://news.ltn.com.tw/rss/politics.xml'),
(u'生活新聞', u'http://www.libertytimes.com.tw/rss/life.xml'), (u'社會', u'http://news.ltn.com.tw/rss/society.xml'),
(u'國際新聞', u'http://www.libertytimes.com.tw/rss/int.xml'), (u'生活', u'http://news.ltn.com.tw/rss/life.xml'),
(u'自由廣場', u'http://www.libertytimes.com.tw/rss/o.xml'), (u'言論', u'http://news.ltn.com.tw/rss/opinion.xml'),
(u'社會新聞', u'http://www.libertytimes.com.tw/rss/so.xml'), (u'國際', u'http://news.ltn.com.tw/rss/world.xml'),
(u'體育新聞', u'http://www.libertytimes.com.tw/rss/sp.xml'), (u'財經', u'http://news.ltn.com.tw/rss/business.xml'),
(u'財經焦點', u'http://www.libertytimes.com.tw/rss/e.xml'), (u'體育', u'http://news.ltn.com.tw/rss/sports.xml'),
(u'證券理財', u'http://www.libertytimes.com.tw/rss/stock.xml'), (u'影視', u'http://news.ltn.com.tw/rss/entertainment.xml'),
(u'影視焦點', u'http://www.libertytimes.com.tw/rss/show.xml'), (u'消費', u'http://news.ltn.com.tw/rss/consumer.xml'),
(u'北部新聞', u'http://www.libertytimes.com.tw/rss/north.xml'), (u'副刊', u'http://news.ltn.com.tw/rss/supplement.xml'),
(u'中部新聞', u'http://www.libertytimes.com.tw/rss/center.xml'), (u'地方', u'http://news.ltn.com.tw/rss/local.xml'),
(u'南部新聞', u'http://www.libertytimes.com.tw/rss/south.xml'), (u'台北都會', u'http://news.ltn.com.tw/rss/taipei.xml'),
(u'大台北新聞', u'http://www.libertytimes.com.tw/rss/taipei.xml'), (u'北部新聞', u'http://news.ltn.com.tw/rss/northern.xml'),
(u'藝術文化', u'http://www.libertytimes.com.tw/rss/art.xml'), (u'中部新聞', u'http://news.ltn.com.tw/rss/central.xml'),
(u'南部新聞', u'http://news.ltn.com.tw/rss/southern.xml')
] ]
extra_css = '''span[class='insubject1'][id='newtitle'] {font-size:200%; font-weight:bold;}''' #extra_css = '''span[class='insubject1'][id='newtitle'] {font-size:200%; font-weight:bold;}'''
__author__ = 'einstuerzende, updated by Eddie Lau' __author__ = 'einstuerzende, updated by Eddie Lau'
__version__ = '1.1' __version__ = '1.2'
language = 'zh' language = 'zh'
publisher = 'Liberty Times Group' publisher = 'Liberty Times Group'
description = 'Liberty Times (Taiwan)' description = 'Liberty Times (Taiwan)'
@ -36,9 +38,11 @@ class AdvancedUserRecipe1277443634(BasicNewsRecipe):
remove_javascript = True remove_javascript = True
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
encoding = 'big5' encoding = 'utf-8'
conversion_options = {'linearize_tables':True} conversion_options = {'linearize_tables':True}
masthead_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif' masthead_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
cover_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif' cover_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif'
keep_only_tags = [dict(name='td', attrs={'id':['newsContent']})] auto_cleanup = True
#keep_only_tags = [dict(name='td', attrs={'id':['newsContent']})]