From 18d1d0d3e8f83837be57c95511f6a86567cb2900 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 6 Apr 2015 10:32:56 +0530 Subject: [PATCH] Update Liberty Times, Apple Daily, China Times and House News --- recipes/apple_daily.recipe | 8 ++++--- recipes/china_times.recipe | 32 ++++++++++++++------------- recipes/house_news.recipe | 25 ++++++++++----------- recipes/liberty_times.recipe | 42 ++++++++++++++++++++---------------- 4 files changed, 58 insertions(+), 49 deletions(-) diff --git a/recipes/apple_daily.recipe b/recipes/apple_daily.recipe index 522427ed6a..0c4a13b7dd 100644 --- a/recipes/apple_daily.recipe +++ b/recipes/apple_daily.recipe @@ -1,7 +1,7 @@ # vim:fileencoding=UTF-8 from __future__ import unicode_literals __license__ = 'GPL v3' -__copyright__ = '2013, Eddie Lau' +__copyright__ = '2013-2015, Eddie Lau' __Date__ = '' from calibre import (__appname__, force_unicode, strftime) @@ -98,9 +98,10 @@ class AppleDaily(BasicNewsRecipe): ul = soup.find(attrs={'class':'menu'}) sectionList = [] for li in ul.findAll('li'): - a = 'http://hkm.appledaily.com/' + li.find('a', href=True).get('href', False) + relativea = li.find('a', href=True).get('href', False) + a = 'http://hkm.appledaily.com/' + relativea title = li.find('a', text=True).strip() - if not title == u'動新聞': + if (not title == u'動新聞') and (relativea.startswith('list.php')): sectionList.append((title, a)) for title, url in sectionList: articles = self.parse_section(url) @@ -273,3 +274,4 @@ class AppleDaily(BasicNewsRecipe): with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file): opf.render(opf_file, ncx_file) + diff --git a/recipes/china_times.recipe b/recipes/china_times.recipe index 8c1493d71f..9fc743ba23 100644 --- a/recipes/china_times.recipe +++ b/recipes/china_times.recipe @@ -9,23 +9,23 @@ class AdvancedUserRecipe1277443634(BasicNewsRecipe): oldest_article = 1 max_articles_per_feed = 100 - feeds = [(u'焦點', u'http://rss.chinatimes.com/rss/focus-u.rss'), - (u'政治', u'http://rss.chinatimes.com/rss/Politic-u.rss'), - (u'社會', u'http://rss.chinatimes.com/rss/social-u.rss'), - (u'國際', u'http://rss.chinatimes.com/rss/international-u.rss'), - (u'兩岸', u'http://rss.chinatimes.com/rss/mainland-u.rss'), - (u'地方', u'http://rss.chinatimes.com/rss/local-u.rss'), - (u'言論', u'http://rss.chinatimes.com/rss/comment-u.rss'), - (u'科技', u'http://rss.chinatimes.com/rss/technology-u.rss'), - (u'運動', u'http://rss.chinatimes.com/rss/sport-u.rss'), - (u'藝文', u'http://rss.chinatimes.com/rss/philology-u.rss'), - #(u'旺報', u'http://rss.chinatimes.com/rss/want-u.rss'), + feeds = [(u'焦點要聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-focus'), + (u'生活新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-life'), + (u'社會新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-society'), + (u'兩岸國際', u'http://feeds.feedburner.com/chinatimes/chinatimes-international'), + (u'時論廣場', u'http://feeds.feedburner.com/chinatimes/chinatimes-comment'), + (u'藝文副刊', u'http://feeds.feedburner.com/chinatimes/chinatimes-philology'), + (u'地方新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-local'), + (u'財經焦點', u'http://feeds.feedburner.com/chinatimes/chinatimes-finance'), + (u'運動天地', u'http://feeds.feedburner.com/chinatimes/chinatimes-sport'), + (u'娛樂新聞', u'http://feeds.feedburner.com/chinatimes/chinatimes-showbiz'), + (u'時尚消費', u'http://feeds.feedburner.com/chinatimes/chinatimes-fashion'), #(u'財經', u'http://rss.chinatimes.com/rss/finance-u.rss'), # broken links #(u'股市', u'http://rss.chinatimes.com/rss/stock-u.rss') # broken links ] __author__ = 'einstuerzende, updated by Eddie Lau' - __version__ = '1.0' + __version__ = '1.1' language = 'zh' publisher = 'China Times Group' description = 'China Times (Taiwan)' @@ -33,10 +33,12 @@ class AdvancedUserRecipe1277443634(BasicNewsRecipe): remove_javascript = True use_embedded_content = False no_stylesheets = True - encoding = 'big5' + auto_cleanup = True + encoding = 'utf-8' conversion_options = {'linearize_tables':True} masthead_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif' cover_url = 'http://www.fcuaa.org/gif/chinatimeslogo.gif' - keep_only_tags = [dict(name='div', attrs={'class':['articlebox','articlebox clearfix']})] - remove_tags = [dict(name='div', attrs={'class':['focus-news']})] + #keep_only_tags = [dict(name='div', attrs={'class':['articlebox','articlebox clearfix']})] + #remove_tags = [dict(name='div', attrs={'class':['focus-news']})] + diff --git a/recipes/house_news.recipe b/recipes/house_news.recipe index 7d8c3275d2..b8c45e4c83 100644 --- a/recipes/house_news.recipe +++ b/recipes/house_news.recipe @@ -1,30 +1,31 @@ __license__ = 'GPL v3' -__copyright__ = '2012, Eddie Lau' +__copyright__ = '2012-2015, Eddie Lau' from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipeHouseNews(BasicNewsRecipe): - title = u'House News \u4e3b\u5834\u65b0\u805e' + title = u'The House News Bloggers 主場博客' __author__ = 'Eddie Lau' - publisher = 'House News' + publisher = 'The House News Bloggers' oldest_article = 1 max_articles_per_feed = 100 auto_cleanup = False + no_stylesheets = True language = 'zh' encoding = 'utf-8' - description = 'http://thehousenews.com' + description = 'http://thehousenewsbloggers.net' category = 'Chinese, Blogs, Opinion, News, Hong Kong' - masthead_url = 'http://thehousenews.com/static/images/housebeta.jpg' + masthead_url = 'http://thehousenewsbloggers.files.wordpress.com/2014/09/screen-shot-2014-09-11-at-8-55-13.png' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} p[class=date] {font-size:50%;} div[class=author] {font-size:75%;} p[class=caption] {font-size:50%;}' - feeds = [(u'Latest', u'http://thehousenews.com/rss/')] - keep_only_tags = [dict(name='h1'), - dict(name='div', attrs={'class':['photo']}), - dict(name='p', attrs={'class':'caption'}), - dict(name='div', attrs={'class':'articleTextWrap'}), - dict(name='div', attrs={'class':['author']}), - dict(name='p', attrs={'class':'date'})] + feeds = [(u'Latest', u'http://thehousenewsbloggers.net/feed/')] + keep_only_tags = [dict(name='h1', attrs={'class':['title']}), + dict(name='span', attrs={'class':['author vcard']}), + dict(name='time', attrs={'class':['entry-date']}), + dict(name='section', attrs={'class':['entry']})] + remove_tags = [dict(name='div', attrs={'id':['jp-post-flair']})] def populate_article_metadata(self, article, soup, first): if first and hasattr(self, 'add_toc_thumbnail'): picdiv = soup.find('img') if picdiv is not None: self.add_toc_thumbnail(article,picdiv['src']) + diff --git a/recipes/liberty_times.recipe b/recipes/liberty_times.recipe index c3a9d106da..f688b9c351 100644 --- a/recipes/liberty_times.recipe +++ b/recipes/liberty_times.recipe @@ -10,25 +10,27 @@ class AdvancedUserRecipe1277443634(BasicNewsRecipe): oldest_article = 1 max_articles_per_feed = 100 - feeds = [(u'焦點新聞', u'http://www.libertytimes.com.tw/rss/fo.xml'), - (u'政治新聞', u'http://www.libertytimes.com.tw/rss/p.xml'), - (u'生活新聞', u'http://www.libertytimes.com.tw/rss/life.xml'), - (u'國際新聞', u'http://www.libertytimes.com.tw/rss/int.xml'), - (u'自由廣場', u'http://www.libertytimes.com.tw/rss/o.xml'), - (u'社會新聞', u'http://www.libertytimes.com.tw/rss/so.xml'), - (u'體育新聞', u'http://www.libertytimes.com.tw/rss/sp.xml'), - (u'財經焦點', u'http://www.libertytimes.com.tw/rss/e.xml'), - (u'證券理財', u'http://www.libertytimes.com.tw/rss/stock.xml'), - (u'影視焦點', u'http://www.libertytimes.com.tw/rss/show.xml'), - (u'北部新聞', u'http://www.libertytimes.com.tw/rss/north.xml'), - (u'中部新聞', u'http://www.libertytimes.com.tw/rss/center.xml'), - (u'南部新聞', u'http://www.libertytimes.com.tw/rss/south.xml'), - (u'大台北新聞', u'http://www.libertytimes.com.tw/rss/taipei.xml'), - (u'藝術文化', u'http://www.libertytimes.com.tw/rss/art.xml'), + feeds = [(u'頭版', u'http://news.ltn.com.tw/rss/focus.xml'), + (u'政治', u'http://news.ltn.com.tw/rss/politics.xml'), + (u'社會', u'http://news.ltn.com.tw/rss/society.xml'), + (u'生活', u'http://news.ltn.com.tw/rss/life.xml'), + (u'言論', u'http://news.ltn.com.tw/rss/opinion.xml'), + (u'國際', u'http://news.ltn.com.tw/rss/world.xml'), + (u'財經', u'http://news.ltn.com.tw/rss/business.xml'), + (u'體育', u'http://news.ltn.com.tw/rss/sports.xml'), + (u'影視', u'http://news.ltn.com.tw/rss/entertainment.xml'), + (u'消費', u'http://news.ltn.com.tw/rss/consumer.xml'), + (u'副刊', u'http://news.ltn.com.tw/rss/supplement.xml'), + (u'地方', u'http://news.ltn.com.tw/rss/local.xml'), + (u'台北都會', u'http://news.ltn.com.tw/rss/taipei.xml'), + (u'北部新聞', u'http://news.ltn.com.tw/rss/northern.xml'), + (u'中部新聞', u'http://news.ltn.com.tw/rss/central.xml'), + (u'南部新聞', u'http://news.ltn.com.tw/rss/southern.xml') + ] - extra_css = '''span[class='insubject1'][id='newtitle'] {font-size:200%; font-weight:bold;}''' + #extra_css = '''span[class='insubject1'][id='newtitle'] {font-size:200%; font-weight:bold;}''' __author__ = 'einstuerzende, updated by Eddie Lau' - __version__ = '1.1' + __version__ = '1.2' language = 'zh' publisher = 'Liberty Times Group' description = 'Liberty Times (Taiwan)' @@ -36,9 +38,11 @@ class AdvancedUserRecipe1277443634(BasicNewsRecipe): remove_javascript = True use_embedded_content = False no_stylesheets = True - encoding = 'big5' + encoding = 'utf-8' conversion_options = {'linearize_tables':True} masthead_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif' cover_url = 'http://www.libertytimes.com.tw/2008/images/img_auto/005/logo_new.gif' - keep_only_tags = [dict(name='td', attrs={'id':['newsContent']})] + auto_cleanup = True + #keep_only_tags = [dict(name='td', attrs={'id':['newsContent']})] +