From 9df2d5dbe561eed06a44e9ae8919656aa4a878f3 Mon Sep 17 00:00:00 2001 From: Kennyl Date: Sat, 21 Jan 2017 12:09:44 +0800 Subject: [PATCH 1/4] Fix NoneType Bug Skip Null node of ul --- recipes/apple_daily.recipe | 2 ++ 1 file changed, 2 insertions(+) diff --git a/recipes/apple_daily.recipe b/recipes/apple_daily.recipe index f34b30f8b6..061baa7937 100644 --- a/recipes/apple_daily.recipe +++ b/recipes/apple_daily.recipe @@ -116,6 +116,8 @@ class AppleDaily(BasicNewsRecipe): soup = self.index_to_soup(url) ul = soup.find(attrs={'class': 'list'}) current_articles = [] + if ul is None : + return current_articles for li in ul.findAll('li'): a = li.find('a', href=True) title = li.find('p', text=True).strip() From 699a6a22daee8cb268cf119eb15675049cb6efcc Mon Sep 17 00:00:00 2001 From: Kennyl Date: Sat, 21 Jan 2017 14:55:01 +0800 Subject: [PATCH 2/4] Fix broken masthead_url change masthead_url --- recipes/apple_daily.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/apple_daily.recipe b/recipes/apple_daily.recipe index 061baa7937..1f804feb6d 100644 --- a/recipes/apple_daily.recipe +++ b/recipes/apple_daily.recipe @@ -33,7 +33,7 @@ class AppleDaily(BasicNewsRecipe): no_stylesheets = True description = 'http://hkm.appledaily.com/' category = 'Chinese, News, Hong Kong' - masthead_url = 'http://upload.wikimedia.org/wikipedia/zh/c/cf/AppleDailyLogo1.png' + masthead_url = 'https://upload.wikimedia.org/wikipedia/zh/thumb/5/52/Apple_Daily_logo.svg/569px-Apple_Daily_logo.svg.png' extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} h1 {font-size:200%; text-align:left; font-weight:bold;} p[class=video-caption] {font-size:50%; margin-left:auto; margin-right:auto;}' # noqa keep_only_tags = [dict(name='div', attrs={'id': 'content-article'})] From b671517232d9a805bfbcedd2203d729e1d9b3b77 Mon Sep 17 00:00:00 2001 From: Kennyl Date: Sat, 21 Jan 2017 14:59:13 +0800 Subject: [PATCH 3/4] reduce the download size Don't outside Hong Kong Section when download take place in Hong Kong Determine by System Time Zone (HKT) --- recipes/apple_daily.recipe | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/recipes/apple_daily.recipe b/recipes/apple_daily.recipe index 1f804feb6d..b96b6e4b05 100644 --- a/recipes/apple_daily.recipe +++ b/recipes/apple_daily.recipe @@ -9,6 +9,7 @@ from calibre.utils.date import now as nowf import os import datetime import re +import time from calibre.web.feeds.recipes import BasicNewsRecipe from contextlib import nested from calibre.ebooks.BeautifulSoup import BeautifulSoup @@ -104,6 +105,15 @@ class AppleDaily(BasicNewsRecipe): relativea = li.find('a', href=True).get('href', False) a = 'http://hkm.appledaily.com/' + relativea title = li.find('a', text=True).strip() + if (time.tzname != 'HKT'): + if (title == u'三藩市'): + continue + if (title == u'洛杉磯'): + continue + if (title == u'紐  約'): + continue + if (title == u'美  國'): + continue if (not title == u'動新聞') and (relativea.startswith('list.php')): sectionList.append((title, a)) for title, url in sectionList: From 0c1f3ce5a3c80eb8dc06af53ea5619272dc6440f Mon Sep 17 00:00:00 2001 From: Kennyl Date: Sat, 21 Jan 2017 15:00:30 +0800 Subject: [PATCH 4/4] Clean nbsp; remove nbsp in title name --- recipes/apple_daily.recipe | 1 + 1 file changed, 1 insertion(+) diff --git a/recipes/apple_daily.recipe b/recipes/apple_daily.recipe index b96b6e4b05..728609d776 100644 --- a/recipes/apple_daily.recipe +++ b/recipes/apple_daily.recipe @@ -117,6 +117,7 @@ class AppleDaily(BasicNewsRecipe): if (not title == u'動新聞') and (relativea.startswith('list.php')): sectionList.append((title, a)) for title, url in sectionList: + title = title.replace(" ", "") articles = self.parse_section(url) if articles: feeds.append((title, articles))