diff --git a/resources/images/news/cnetjapan_digital.png b/resources/images/news/cnetjapan_digital.png new file mode 100644 index 0000000000..9a0dcc8f7f Binary files /dev/null and b/resources/images/news/cnetjapan_digital.png differ diff --git a/resources/images/news/cnetjapan_release.png b/resources/images/news/cnetjapan_release.png new file mode 100644 index 0000000000..9a0dcc8f7f Binary files /dev/null and b/resources/images/news/cnetjapan_release.png differ diff --git a/resources/images/news/mainichi.png b/resources/images/news/mainichi.png new file mode 100644 index 0000000000..9f8e8f319f Binary files /dev/null and b/resources/images/news/mainichi.png differ diff --git a/resources/images/news/mainichi_it_news.png b/resources/images/news/mainichi_it_news.png new file mode 100644 index 0000000000..9f8e8f319f Binary files /dev/null and b/resources/images/news/mainichi_it_news.png differ diff --git a/resources/images/news/nikkei_sub_industory.png b/resources/images/news/nikkei_sub.png similarity index 100% rename from resources/images/news/nikkei_sub_industory.png rename to resources/images/news/nikkei_sub.png diff --git a/resources/images/news/nikkei_sub_industry.png b/resources/images/news/nikkei_sub_industry.png new file mode 100644 index 0000000000..308f4b3085 Binary files /dev/null and b/resources/images/news/nikkei_sub_industry.png differ diff --git a/resources/images/news/yomiuri.png b/resources/images/news/yomiuri.png new file mode 100644 index 0000000000..4a197f888f Binary files /dev/null and b/resources/images/news/yomiuri.png differ diff --git a/resources/recipes/cnetjapan.recipe b/resources/recipes/cnetjapan.recipe index e0178c1ff2..1058b90401 100644 --- a/resources/recipes/cnetjapan.recipe +++ b/resources/recipes/cnetjapan.recipe @@ -7,7 +7,9 @@ class CNetJapan(BasicNewsRecipe): max_articles_per_feed = 30 __author__ = 'Hiroshi Miura' - feeds = [(u'cnet rss', u'http://feeds.japan.cnet.com/cnet/rss')] + feeds = [(u'CNet News', u'http://feed.japan.cnet.com/rss/index.rdf'), + (u'CNet Blog', u'http://feed.japan.cnet.com/rss/blog/index.rdf') + ] language = 'ja' encoding = 'Shift_JIS' remove_javascript = True @@ -21,12 +23,29 @@ class CNetJapan(BasicNewsRecipe): lambda match: ''), ] - remove_tags_before = dict(name="h2") + remove_tags_before = dict(id="contents_l") remove_tags = [ {'class':"social_bkm_share"}, {'class':"social_bkm_print"}, {'class':"block20 clearfix"}, dict(name="div",attrs={'id':'bookreview'}), + {'class':"tag_left_ttl"}, + {'class':"tag_right"} ] remove_tags_after = {'class':"block20"} + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'pheedo.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds diff --git a/resources/recipes/cnetjapan_digital.recipe b/resources/recipes/cnetjapan_digital.recipe new file mode 100644 index 0000000000..9028126af2 --- /dev/null +++ b/resources/recipes/cnetjapan_digital.recipe @@ -0,0 +1,49 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class CNetJapanDigital(BasicNewsRecipe): + title = u'CNET Japan Digital' + oldest_article = 3 + max_articles_per_feed = 30 + __author__ = 'Hiroshi Miura' + + feeds = [(u'CNet digital',u'http://feed.japan.cnet.com/rss/digital/index.rdf') ] + language = 'ja' + encoding = 'Shift_JIS' + remove_javascript = True + + preprocess_regexps = [ + (re.compile(ur'.*', re.DOTALL|re.IGNORECASE|re.UNICODE), + lambda match: ''), + (re.compile(r'.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), + (re.compile(ur'.*', re.UNICODE), + lambda match: ''), + ] + + remove_tags_before = dict(id="contents_l") + remove_tags = [ + {'class':"social_bkm_share"}, + {'class':"social_bkm_print"}, + {'class':"block20 clearfix"}, + dict(name="div",attrs={'id':'bookreview'}), + {'class':"tag_left_ttl"}, + {'class':"tag_right"} + ] + remove_tags_after = {'class':"block20"} + + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'pheedo.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds diff --git a/resources/recipes/cnetjapan_release.recipe b/resources/recipes/cnetjapan_release.recipe new file mode 100644 index 0000000000..e8d13ec99f --- /dev/null +++ b/resources/recipes/cnetjapan_release.recipe @@ -0,0 +1,48 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class CNetJapanRelease(BasicNewsRecipe): + title = u'CNET Japan release' + oldest_article = 3 + max_articles_per_feed = 30 + __author__ = 'Hiroshi Miura' + + feeds = [(u'CNet Release', u'http://feed.japan.cnet.com/rss/release/index.rdf') ] + language = 'ja' + encoding = 'Shift_JIS' + remove_javascript = True + + preprocess_regexps = [ + (re.compile(ur'.*', re.DOTALL|re.IGNORECASE|re.UNICODE), + lambda match: ''), + (re.compile(r'.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), + (re.compile(ur'.*', re.UNICODE), + lambda match: ''), + ] + + remove_tags_before = dict(id="contents_l") + remove_tags = [ + {'class':"social_bkm_share"}, + {'class':"social_bkm_print"}, + {'class':"block20 clearfix"}, + dict(name="div",attrs={'id':'bookreview'}), + {'class':"tag_left_ttl"} + ] + remove_tags_after = {'class':"block20"} + + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'pheedo.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds diff --git a/resources/recipes/endgadget_ja.recipe b/resources/recipes/endgadget_ja.recipe index 443a85905d..891e6720a5 100644 --- a/resources/recipes/endgadget_ja.recipe +++ b/resources/recipes/endgadget_ja.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe index f74864365d..4f768ce7ee 100644 --- a/resources/recipes/jijinews.recipe +++ b/resources/recipes/jijinews.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' @@ -14,13 +12,20 @@ class JijiDotCom(BasicNewsRecipe): description = 'World News from Jiji Press' publisher = 'Jiji Press Ltd.' category = 'news' - encoding = 'utf-8' oldest_article = 6 max_articles_per_feed = 100 + encoding = 'euc_jisx0213' language = 'ja' - cover_url = 'http://www.jiji.com/img/top_header_logo2.gif' - masthead_url = 'http://jen.jiji.com/images/logo_jijipress.gif' + masthead_url = 'http://jen.jiji.com/images/logo_jijipress.gif' + top_url = 'http://www.jiji.com/' feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')] remove_tags_after = dict(id="ad_google") + def get_cover_url(self): + cover_url = 'http://www.jiji.com/img/top_header_logo2.gif' + soup = self.index_to_soup(self.top_url) + cover_item = soup.find('div', attrs={'class':'top-pad-photos'}) + if cover_item: + cover_url = self.top_url + cover_item.img['src'] + return cover_url diff --git a/resources/recipes/mainichi.recipe b/resources/recipes/mainichi.recipe index 47dc7d0ebc..2a44fa0980 100644 --- a/resources/recipes/mainichi.recipe +++ b/resources/recipes/mainichi.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/msnsankei.recipe b/resources/recipes/msnsankei.recipe index 4c79771945..ae195559d5 100644 --- a/resources/recipes/msnsankei.recipe +++ b/resources/recipes/msnsankei.recipe @@ -1,4 +1,3 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' @@ -16,9 +15,13 @@ class MSNSankeiNewsProduct(BasicNewsRecipe): max_articles_per_feed = 100 encoding = 'Shift_JIS' language = 'ja' + cover_url = 'http://sankei.jp.msn.com/images/common/sankeShinbunLogo.jpg' + masthead_url = 'http://sankei.jp.msn.com/images/common/sankeiNewsLogo.gif' feeds = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')] remove_tags_before = dict(id="__r_article_title__") remove_tags_after = dict(id="ajax_release_news") - remove_tags = [{'class':"parent chromeCustom6G"}] + remove_tags = [{'class':"parent chromeCustom6G"}, + dict(id="RelatedImg") + ] diff --git a/resources/recipes/nikkei_free.recipe b/resources/recipes/nikkei_free.recipe index d84aaa279b..adc596104b 100644 --- a/resources/recipes/nikkei_free.recipe +++ b/resources/recipes/nikkei_free.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' @@ -9,9 +7,9 @@ www.nikkei.com from calibre.web.feeds.news import BasicNewsRecipe class NikkeiNet(BasicNewsRecipe): - title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free)' + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free, MAX)' __author__ = 'Hiroshi Miura' - description = 'News and current market affairs from Japan' + description = 'News and current market affairs from Japan, no subscription and getting max feed.' cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' oldest_article = 2 diff --git a/resources/recipes/nikkei_sub.recipe b/resources/recipes/nikkei_sub.recipe index 95b0017339..18f324009a 100644 --- a/resources/recipes/nikkei_sub.recipe +++ b/resources/recipes/nikkei_sub.recipe @@ -5,12 +5,12 @@ from calibre.ptempfile import PersistentTemporaryFile class NikkeiNet_subscription(BasicNewsRecipe): - title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248' + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(MAX)' __author__ = 'Hiroshi Miura' - description = 'News and current market affairs from Japan' + description = 'News and current market affairs from Japan, gather MAX articles' needs_subscription = True oldest_article = 2 - max_articles_per_feed = 20 + max_articles_per_feed = 10 language = 'ja' remove_javascript = False temp_files = [] diff --git a/resources/recipes/nikkei_sub_economy.recipe b/resources/recipes/nikkei_sub_economy.recipe index d762f505d1..2dd8f1add8 100644 --- a/resources/recipes/nikkei_sub_economy.recipe +++ b/resources/recipes/nikkei_sub_economy.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/nikkei_sub_industry.recipe b/resources/recipes/nikkei_sub_industry.recipe index da04bbb5f3..81e86767d0 100644 --- a/resources/recipes/nikkei_sub_industry.recipe +++ b/resources/recipes/nikkei_sub_industry.recipe @@ -1,4 +1,3 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' diff --git a/resources/recipes/nikkei_sub_life.recipe b/resources/recipes/nikkei_sub_life.recipe index 2da5b13834..1bfa08a55f 100644 --- a/resources/recipes/nikkei_sub_life.recipe +++ b/resources/recipes/nikkei_sub_life.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/nikkei_sub_main.recipe b/resources/recipes/nikkei_sub_main.recipe index 142edf624d..485d2f32c0 100644 --- a/resources/recipes/nikkei_sub_main.recipe +++ b/resources/recipes/nikkei_sub_main.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' @@ -30,6 +28,9 @@ class NikkeiNet_sub_main(BasicNewsRecipe): {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"}, {'class':"cmn-article_keyword cmn-clearfix"}, {'class':"cmn-print_headline cmn-clearfix"}, + {'class':"cmn-article_list"}, + {'class':"cmn-dashedline"}, + {'class':"cmn-hide"}, ] remove_tags_after = {'class':"cmn-pr_list"} diff --git a/resources/recipes/nikkei_sub_sports.recipe b/resources/recipes/nikkei_sub_sports.recipe index 6e5a1c6bb2..644b0aa252 100644 --- a/resources/recipes/nikkei_sub_sports.recipe +++ b/resources/recipes/nikkei_sub_sports.recipe @@ -1,4 +1,3 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' diff --git a/resources/recipes/yomiuri.recipe b/resources/recipes/yomiuri.recipe new file mode 100644 index 0000000000..d30aa9066f --- /dev/null +++ b/resources/recipes/yomiuri.recipe @@ -0,0 +1,63 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.yomiuri.co.jp +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class YOLNews(BasicNewsRecipe): + title = u'Yomiuri Online (Latest)' + __author__ = 'Hiroshi Miura' + oldest_article = 1 + max_articles_per_feed = 50 + description = 'Japanese traditional newspaper Yomiuri Online News' + publisher = 'Yomiuri Online News' + category = 'news, japan' + language = 'ja' + encoding = 'Shift_JIS' + index = 'http://www.yomiuri.co.jp/latestnews/' + remove_javascript = True + masthead_title = u'YOMIURI ONLINE' + + remove_tags_before = {'class':"article-def"} + remove_tags = [{'class':"RelatedArticle"}, + {'class':"sbtns"} + ] + remove_tags_after = {'class':"date-def"} + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'rssad.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + return feeds + + def parse_index(self): + feeds = [] + soup = self.index_to_soup(self.index) + topstories = soup.find('ul',attrs={'class':'list-def'}) + if topstories: + newsarticles = [] + for itt in topstories.findAll('li'): + itema = itt.find('a',href=True) + if itema: + itd1 = itema.findNextSibling(text = True) + itd2 = itd1.findNextSibling(text = True) + itd3 = itd2.findNextSibling(text = True) + newsarticles.append({ + 'title' :itema.string + ,'date' :''.join([itd1, itd2, itd3]) + ,'url' :'http://www.yomiuri.co.jp' + itema['href'] + ,'description':'' + }) + feeds.append(('latest', newsarticles)) + return feeds + diff --git a/resources/recipes/yomiuri_world.recipe b/resources/recipes/yomiuri_world.recipe new file mode 100644 index 0000000000..f5f21c4aab --- /dev/null +++ b/resources/recipes/yomiuri_world.recipe @@ -0,0 +1,61 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.yomiuri.co.jp +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class YOLNews(BasicNewsRecipe): + title = u'Yomiuri Online (World)' + __author__ = 'Hiroshi Miura' + oldest_article = 2 + max_articles_per_feed = 50 + description = 'Japanese traditional newspaper Yomiuri Online News/world news' + publisher = 'Yomiuri Online News' + category = 'news, japan' + language = 'ja' + encoding = 'Shift_JIS' + index = 'http://www.yomiuri.co.jp/world/' + remove_javascript = True + masthead_title = u"YOMIURI ONLINE" + + remove_tags_before = {'class':"article-def"} + remove_tags = [{'class':"RelatedArticle"}, + {'class':"sbtns"} + ] + remove_tags_after = {'class':"date-def"} + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'rssad.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + return feeds + + def parse_index(self): + feeds = [] + soup = self.index_to_soup(self.index) + topstories = soup.find('ul',attrs={'class':'list-def'}) + if topstories: + newsarticles = [] + for itt in topstories.findAll('li'): + itema = itt.find('a',href=True) + if itema: + itd1 = itema.findNextSibling(text = True) + newsarticles.append({ + 'title' :itema.string + ,'date' :''.join([itd1]) + ,'url' :'http://www.yomiuri.co.jp' + itema['href'] + ,'description':'' + }) + feeds.append(('World', newsarticles)) + return feeds +