From 56e38290df6125f34ae8f6a18ae75b4bf7e7724e Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sun, 28 Nov 2010 23:41:46 +0900 Subject: [PATCH] recipes: fix minor bugs, add yomiuri news - remove wrong #!(hash-bang) - add yomiuri online news --- resources/recipes/endgadget_ja.recipe | 2 - resources/recipes/jijinews.recipe | 2 - resources/recipes/mainichi.recipe | 2 - resources/recipes/nikkei_free.recipe | 6 +- resources/recipes/nikkei_sub.recipe | 6 +- resources/recipes/nikkei_sub_economy.recipe | 2 - resources/recipes/nikkei_sub_industry.recipe | 1 - resources/recipes/nikkei_sub_life.recipe | 2 - resources/recipes/nikkei_sub_main.recipe | 2 - resources/recipes/nikkei_sub_sports.recipe | 1 - resources/recipes/yomiuri.recipe | 66 ++++++++++++++++++++ 11 files changed, 71 insertions(+), 21 deletions(-) create mode 100644 resources/recipes/yomiuri.recipe diff --git a/resources/recipes/endgadget_ja.recipe b/resources/recipes/endgadget_ja.recipe index 443a85905d..891e6720a5 100644 --- a/resources/recipes/endgadget_ja.recipe +++ b/resources/recipes/endgadget_ja.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe index fe52e76aaf..4f768ce7ee 100644 --- a/resources/recipes/jijinews.recipe +++ b/resources/recipes/jijinews.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/mainichi.recipe b/resources/recipes/mainichi.recipe index 47dc7d0ebc..2a44fa0980 100644 --- a/resources/recipes/mainichi.recipe +++ b/resources/recipes/mainichi.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/nikkei_free.recipe b/resources/recipes/nikkei_free.recipe index d84aaa279b..adc596104b 100644 --- a/resources/recipes/nikkei_free.recipe +++ b/resources/recipes/nikkei_free.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' @@ -9,9 +7,9 @@ www.nikkei.com from calibre.web.feeds.news import BasicNewsRecipe class NikkeiNet(BasicNewsRecipe): - title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free)' + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free, MAX)' __author__ = 'Hiroshi Miura' - description = 'News and current market affairs from Japan' + description = 'News and current market affairs from Japan, no subscription and getting max feed.' cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' oldest_article = 2 diff --git a/resources/recipes/nikkei_sub.recipe b/resources/recipes/nikkei_sub.recipe index 95b0017339..18f324009a 100644 --- a/resources/recipes/nikkei_sub.recipe +++ b/resources/recipes/nikkei_sub.recipe @@ -5,12 +5,12 @@ from calibre.ptempfile import PersistentTemporaryFile class NikkeiNet_subscription(BasicNewsRecipe): - title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248' + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(MAX)' __author__ = 'Hiroshi Miura' - description = 'News and current market affairs from Japan' + description = 'News and current market affairs from Japan, gather MAX articles' needs_subscription = True oldest_article = 2 - max_articles_per_feed = 20 + max_articles_per_feed = 10 language = 'ja' remove_javascript = False temp_files = [] diff --git a/resources/recipes/nikkei_sub_economy.recipe b/resources/recipes/nikkei_sub_economy.recipe index d762f505d1..2dd8f1add8 100644 --- a/resources/recipes/nikkei_sub_economy.recipe +++ b/resources/recipes/nikkei_sub_economy.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/nikkei_sub_industry.recipe b/resources/recipes/nikkei_sub_industry.recipe index da04bbb5f3..81e86767d0 100644 --- a/resources/recipes/nikkei_sub_industry.recipe +++ b/resources/recipes/nikkei_sub_industry.recipe @@ -1,4 +1,3 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' diff --git a/resources/recipes/nikkei_sub_life.recipe b/resources/recipes/nikkei_sub_life.recipe index 2da5b13834..1bfa08a55f 100644 --- a/resources/recipes/nikkei_sub_life.recipe +++ b/resources/recipes/nikkei_sub_life.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/nikkei_sub_main.recipe b/resources/recipes/nikkei_sub_main.recipe index 37fc8964c4..485d2f32c0 100644 --- a/resources/recipes/nikkei_sub_main.recipe +++ b/resources/recipes/nikkei_sub_main.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/nikkei_sub_sports.recipe b/resources/recipes/nikkei_sub_sports.recipe index 6e5a1c6bb2..644b0aa252 100644 --- a/resources/recipes/nikkei_sub_sports.recipe +++ b/resources/recipes/nikkei_sub_sports.recipe @@ -1,4 +1,3 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' diff --git a/resources/recipes/yomiuri.recipe b/resources/recipes/yomiuri.recipe new file mode 100644 index 0000000000..6335b99e32 --- /dev/null +++ b/resources/recipes/yomiuri.recipe @@ -0,0 +1,66 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.yomiuri.co.jp +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class YOLNews(BasicNewsRecipe): + title = u'YOMIURI ONLINE' + __author__ = 'Hiroshi Miura' + oldest_article = 1 + max_articles_per_feed = 50 + description = 'Japanese traditional newspaper Yomiuri Online News' + publisher = 'Yomiuri Online News' + category = 'news, japan' + language = 'ja' + encoding = 'Shift_JIS' + index = 'http://www.yomiuri.co.jp/latestnews/' + remove_javascript = True + + remove_tags_before = {'class':"article-def"} + remove_tags = [{'class':"RelatedArticle"}, + {'class:"sbtns"} + ] + remove_tags_after = {'class':"date-def"} + + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'rssad.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds + + def parse_index(self): + feeds = [] + soup = self.index_to_soup(self.index) + topstories = soup.find('ul',attrs={'class':'list-def'}) + if topstories: + newsarticles = [] + for itt in topstories.findAll('li'): + itema = itt.find('a',href=True) + if itema: + itd1 = itema.findNextSibling(text = True) + itd2 = itd1.findNextSibling(text = True) + itd3 = itd2.findNextSibling(text = True) + newsarticles.append({ + 'title' :itema.string + ,'date' :''.join([itd1, itd2, itd3]) + ,'url' :'http://www.yomiuri.co.jp' + itema['href'] + ,'description':'' + }) + feeds.append(('News', newsarticles)) + + return feeds +