From 38d1375974023ace56817bd8e53e245ecf3621a5 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sat, 27 Nov 2010 01:08:09 +0900 Subject: [PATCH 1/9] fix charcode and minor bugs --- resources/recipes/jijinews.recipe | 11 ++++++++++- resources/recipes/msnsankei.recipe | 7 +++++-- resources/recipes/nikkei_sub_main.recipe | 3 +++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe index f74864365d..4af242063e 100644 --- a/resources/recipes/jijinews.recipe +++ b/resources/recipes/jijinews.recipe @@ -17,10 +17,19 @@ class JijiDotCom(BasicNewsRecipe): encoding = 'utf-8' oldest_article = 6 max_articles_per_feed = 100 + encoding = 'EUC_JP' language = 'ja' - cover_url = 'http://www.jiji.com/img/top_header_logo2.gif' masthead_url = 'http://jen.jiji.com/images/logo_jijipress.gif' + top_url = 'http://www.jiji.com/' feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')] remove_tags_after = dict(id="ad_google") + def get_cover_url(self): + cover_url = 'http://www.jiji.com/img/top_header_logo2.gif' + soup = self.index_to_soup(self.top_url) + cover_item = soup.find('div', attrs={'class':'top-pad-photos'}) + if cover_item: + cover_url = self.top_url + cover_item.img['src'] + return cover_url + diff --git a/resources/recipes/msnsankei.recipe b/resources/recipes/msnsankei.recipe index 4c79771945..8c78ccd9e9 100644 --- a/resources/recipes/msnsankei.recipe +++ b/resources/recipes/msnsankei.recipe @@ -1,4 +1,3 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' @@ -16,9 +15,13 @@ class MSNSankeiNewsProduct(BasicNewsRecipe): max_articles_per_feed = 100 encoding = 'Shift_JIS' language = 'ja' + cover_url = 'http://sankei.jp.msn.com/images/common/sankeShinbunLogo.jpg' + masthead_url = 'http://sankei.jp.msn.com/images/common/sankeiNewsLogo.gif' feeds = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')] remove_tags_before = dict(id="__r_article_title__") remove_tags_after = dict(id="ajax_release_news") - remove_tags = [{'class':"parent chromeCustom6G"}] + remove_tags = [{'class':"parent chromeCustom6G"}, + {'class':"RelatedImg"} + ] diff --git a/resources/recipes/nikkei_sub_main.recipe b/resources/recipes/nikkei_sub_main.recipe index 142edf624d..37fc8964c4 100644 --- a/resources/recipes/nikkei_sub_main.recipe +++ b/resources/recipes/nikkei_sub_main.recipe @@ -30,6 +30,9 @@ class NikkeiNet_sub_main(BasicNewsRecipe): {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"}, {'class':"cmn-article_keyword cmn-clearfix"}, {'class':"cmn-print_headline cmn-clearfix"}, + {'class':"cmn-article_list"}, + {'class':"cmn-dashedline"}, + {'class':"cmn-hide"}, ] remove_tags_after = {'class':"cmn-pr_list"} From 176010dc03fa5bb6c4ca30e179c3a1a4d49e73a5 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sat, 27 Nov 2010 09:53:35 +0900 Subject: [PATCH 2/9] recipe: jijinews: fix indent error --- resources/recipes/jijinews.recipe | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe index 4af242063e..a62b4db739 100644 --- a/resources/recipes/jijinews.recipe +++ b/resources/recipes/jijinews.recipe @@ -19,17 +19,16 @@ class JijiDotCom(BasicNewsRecipe): max_articles_per_feed = 100 encoding = 'EUC_JP' language = 'ja' - masthead_url = 'http://jen.jiji.com/images/logo_jijipress.gif' - top_url = 'http://www.jiji.com/' + masthead_url = 'http://jen.jiji.com/images/logo_jijipress.gif' + top_url = 'http://www.jiji.com/' feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')] remove_tags_after = dict(id="ad_google") - def get_cover_url(self): - cover_url = 'http://www.jiji.com/img/top_header_logo2.gif' - soup = self.index_to_soup(self.top_url) - cover_item = soup.find('div', attrs={'class':'top-pad-photos'}) - if cover_item: - cover_url = self.top_url + cover_item.img['src'] - return cover_url - + def get_cover_url(self): + cover_url = 'http://www.jiji.com/img/top_header_logo2.gif' + soup = self.index_to_soup(self.top_url) + cover_item = soup.find('div', attrs={'class':'top-pad-photos'}) + if cover_item: + cover_url = self.top_url + cover_item.img['src'] + return cover_url From 3a8eae0fba90b4c3ee040e47c7f64e5f2ed8c6b2 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sat, 27 Nov 2010 10:18:43 +0900 Subject: [PATCH 3/9] recipe: some fix in japanese recipe jijinews: charset fix msnsankei: removal tag fix --- resources/recipes/jijinews.recipe | 2 +- resources/recipes/msnsankei.recipe | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe index a62b4db739..98e7a180d1 100644 --- a/resources/recipes/jijinews.recipe +++ b/resources/recipes/jijinews.recipe @@ -17,7 +17,7 @@ class JijiDotCom(BasicNewsRecipe): encoding = 'utf-8' oldest_article = 6 max_articles_per_feed = 100 - encoding = 'EUC_JP' + encoding = 'euc_jisx0213' language = 'ja' masthead_url = 'http://jen.jiji.com/images/logo_jijipress.gif' top_url = 'http://www.jiji.com/' diff --git a/resources/recipes/msnsankei.recipe b/resources/recipes/msnsankei.recipe index 8c78ccd9e9..ae195559d5 100644 --- a/resources/recipes/msnsankei.recipe +++ b/resources/recipes/msnsankei.recipe @@ -23,5 +23,5 @@ class MSNSankeiNewsProduct(BasicNewsRecipe): remove_tags_before = dict(id="__r_article_title__") remove_tags_after = dict(id="ajax_release_news") remove_tags = [{'class':"parent chromeCustom6G"}, - {'class':"RelatedImg"} + dict(id="RelatedImg") ] From 064bfaa7f9eb61f298895c6a30c6cd90d5b0c87f Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sat, 27 Nov 2010 12:04:00 +0900 Subject: [PATCH 4/9] recipe: japanese: fix minor bugs - jijinews: fix encoding - mainichi, nikkei: add icons --- resources/images/news/mainichi.png | Bin 0 -> 953 bytes resources/images/news/mainichi_it_news.png | Bin 0 -> 953 bytes .../{nikkei_sub_industory.png => nikkei_sub.png} | Bin resources/images/news/nikkei_sub_industry.png | Bin 0 -> 948 bytes resources/recipes/jijinews.recipe | 1 - 5 files changed, 1 deletion(-) create mode 100644 resources/images/news/mainichi.png create mode 100644 resources/images/news/mainichi_it_news.png rename resources/images/news/{nikkei_sub_industory.png => nikkei_sub.png} (100%) create mode 100644 resources/images/news/nikkei_sub_industry.png diff --git a/resources/images/news/mainichi.png b/resources/images/news/mainichi.png new file mode 100644 index 0000000000000000000000000000000000000000..9f8e8f319fb20ec4540c942504efedf215fc2479 GIT binary patch literal 953 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?>V;x;TbdoUWY|-4PlpalAhJ?bi33pU=5? zMKX6+rLw7;#>EqIT}7*=%Ps4Cv2;QEyc`3g7e|{`EKuoU(mdkMyn3QR*DKFU>~Gz| zL@Zn973K8JGk(1D?dI=x*=yzc%vvw}|M~mh{qOrbe5?O$;GXF;qh}tYd(*ZD9RK7m zEte5lk>!1PBctmwNgmaB8{0$e_qSF4`)d`a@_(jy!^VIVHv1_R`;^Y_);Qm2a{6Rc ztXiJ_#zG0c*-Z~8DfWr+XE9GyuXu5d(eB5xv-w+l4n{0ss5y*-n(MwL@#qxy{OjBoWfRlbSu4cwJ} zbHl0B_vzCset)pZx7j&oWwn=2{nbb7dIg;)ru=g0mYuEqZ_W1`^=mx+qo)*G_83g4 z`IUU?9dqc0g3letH(riao42;0X4&b9@2!1of**dKAEdWbOzM~Yr#Tt7uW)<5`1mMg z=I`xC-rNun`Z-BD|AVA_w?_MQen#)NqZ8k5eLn)HcO@QFzEwAH#x|M?W&x0)rXrQzlN8+g!aq`St1N z7C%d8|G5yQc7NrkizRa}zhC?Caj{QWm2yBPlh*pcDwxU zV+rS+cVk|`?XPmz`840fNN;$*bE-{&_mSHCt2;IXeCaM>ZEjq0?b;S2X0f=RDU9bE zHwMl>AR6+@c1vTkdi0rc9Ozamx% P)WG2B>gTe~DWM4fFNU-h literal 0 HcmV?d00001 diff --git a/resources/images/news/mainichi_it_news.png b/resources/images/news/mainichi_it_news.png new file mode 100644 index 0000000000000000000000000000000000000000..9f8e8f319fb20ec4540c942504efedf215fc2479 GIT binary patch literal 953 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?>V;x;TbdoUWY|-4PlpalAhJ?bi33pU=5? zMKX6+rLw7;#>EqIT}7*=%Ps4Cv2;QEyc`3g7e|{`EKuoU(mdkMyn3QR*DKFU>~Gz| zL@Zn973K8JGk(1D?dI=x*=yzc%vvw}|M~mh{qOrbe5?O$;GXF;qh}tYd(*ZD9RK7m zEte5lk>!1PBctmwNgmaB8{0$e_qSF4`)d`a@_(jy!^VIVHv1_R`;^Y_);Qm2a{6Rc ztXiJ_#zG0c*-Z~8DfWr+XE9GyuXu5d(eB5xv-w+l4n{0ss5y*-n(MwL@#qxy{OjBoWfRlbSu4cwJ} zbHl0B_vzCset)pZx7j&oWwn=2{nbb7dIg;)ru=g0mYuEqZ_W1`^=mx+qo)*G_83g4 z`IUU?9dqc0g3letH(riao42;0X4&b9@2!1of**dKAEdWbOzM~Yr#Tt7uW)<5`1mMg z=I`xC-rNun`Z-BD|AVA_w?_MQen#)NqZ8k5eLn)HcO@QFzEwAH#x|M?W&x0)rXrQzlN8+g!aq`St1N z7C%d8|G5yQc7NrkizRa}zhC?Caj{QWm2yBPlh*pcDwxU zV+rS+cVk|`?XPmz`840fNN;$*bE-{&_mSHCt2;IXeCaM>ZEjq0?b;S2X0f=RDU9bE zHwMl>AR6+@c1vTkdi0rc9Ozamx% P)WG2B>gTe~DWM4fFNU-h literal 0 HcmV?d00001 diff --git a/resources/images/news/nikkei_sub_industory.png b/resources/images/news/nikkei_sub.png similarity index 100% rename from resources/images/news/nikkei_sub_industory.png rename to resources/images/news/nikkei_sub.png diff --git a/resources/images/news/nikkei_sub_industry.png b/resources/images/news/nikkei_sub_industry.png new file mode 100644 index 0000000000000000000000000000000000000000..308f4b3085bafd5c8aabf5fcc5196da3ed2d39bb GIT binary patch literal 948 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?|#Qx;TbdoX(x>oe`5NbF}_`?Nhax&jQyz zdn%Hiv2^LytJWG*r5riioPsX7?iRe#Yor~$O2b5N%S*R{hgty_YhA8zG?@gV43}NidsXG`|@pjZ5JyqUG&hi zCO)ju^BLFU@3)THKF|obGjns>=E}(`jY~8=g)6>S$OjvTraiZvrhRg|v|~JThV+LS zx!o@h8LvIeT*-WXa?OWJ40*nfuB#}YFZ*=vv~5Z5<(pqSB+PhCdxNEZ+cR|io*UK^ z))+n6=2~L$4~Mpm%Xe2A-EUjR&G#lxnM2ixSHu53qsBURi=~}&EwfK#HM_+MT@h{n zww?LIz1Mr6{EB)Pw^RTAsipn0a#aq?cPE!}EH_#B(kgwEWpmr|jH)h^kR>mz5@t$q zcl*Ao^ZHrlA)(18x9wt!+p?uRD~#RJm-|iPn$@Vn>A1|m`u)d@{YPIUh~JMb-MMYU zG5-B#8EZTl#Qt}A2=$9??s{SWYxAqqQgh^9K0I>a+wRx%{x51u<^x6y-$1*|5L#qCE)St_M6`c4cGhLCcJXuJuM((H{;>diR_W9j_mmxIMaaV z<6X8u<3k)wCm#q)Zo9Pb`0dKCe`mird-G0Ir^UBx^Id%9AJrSSc5m5r;gG|_^-H7K zbpFadUhi4r8&Dd&NP{WTb%m&juIf^8U(IuK|E9Fgntg|T{hDKQ{uhY`q*UwtDxdMX zU{}XEqe~TB0?$o=F&O6@cSe40df7U^v)wayKDzY#QD}hS3bk#zH@2U4QsK^cWmV;E zZg|cWsPx#e1D@Lz=gHfCwGDhBbd+fdn}(gOwa1JWiLfI#COYn$8L1}Z&Y)&K{UuAl zmb0gdcSR|5+APT_di_Z{W0#A*IV=)1N=*K52a Date: Sat, 27 Nov 2010 14:32:01 +0900 Subject: [PATCH 5/9] recipes: add new cnet japan feed - some removal of tags - skip ad pages --- resources/recipes/cnetjapan.recipe | 23 +++++++++- resources/recipes/cnetjapan_digital.recipe | 49 ++++++++++++++++++++++ resources/recipes/cnetjapan_release.recipe | 48 +++++++++++++++++++++ 3 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 resources/recipes/cnetjapan_digital.recipe create mode 100644 resources/recipes/cnetjapan_release.recipe diff --git a/resources/recipes/cnetjapan.recipe b/resources/recipes/cnetjapan.recipe index e0178c1ff2..1058b90401 100644 --- a/resources/recipes/cnetjapan.recipe +++ b/resources/recipes/cnetjapan.recipe @@ -7,7 +7,9 @@ class CNetJapan(BasicNewsRecipe): max_articles_per_feed = 30 __author__ = 'Hiroshi Miura' - feeds = [(u'cnet rss', u'http://feeds.japan.cnet.com/cnet/rss')] + feeds = [(u'CNet News', u'http://feed.japan.cnet.com/rss/index.rdf'), + (u'CNet Blog', u'http://feed.japan.cnet.com/rss/blog/index.rdf') + ] language = 'ja' encoding = 'Shift_JIS' remove_javascript = True @@ -21,12 +23,29 @@ class CNetJapan(BasicNewsRecipe): lambda match: ''), ] - remove_tags_before = dict(name="h2") + remove_tags_before = dict(id="contents_l") remove_tags = [ {'class':"social_bkm_share"}, {'class':"social_bkm_print"}, {'class':"block20 clearfix"}, dict(name="div",attrs={'id':'bookreview'}), + {'class':"tag_left_ttl"}, + {'class':"tag_right"} ] remove_tags_after = {'class':"block20"} + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'pheedo.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds diff --git a/resources/recipes/cnetjapan_digital.recipe b/resources/recipes/cnetjapan_digital.recipe new file mode 100644 index 0000000000..9028126af2 --- /dev/null +++ b/resources/recipes/cnetjapan_digital.recipe @@ -0,0 +1,49 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class CNetJapanDigital(BasicNewsRecipe): + title = u'CNET Japan Digital' + oldest_article = 3 + max_articles_per_feed = 30 + __author__ = 'Hiroshi Miura' + + feeds = [(u'CNet digital',u'http://feed.japan.cnet.com/rss/digital/index.rdf') ] + language = 'ja' + encoding = 'Shift_JIS' + remove_javascript = True + + preprocess_regexps = [ + (re.compile(ur'.*', re.DOTALL|re.IGNORECASE|re.UNICODE), + lambda match: ''), + (re.compile(r'.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), + (re.compile(ur'.*', re.UNICODE), + lambda match: ''), + ] + + remove_tags_before = dict(id="contents_l") + remove_tags = [ + {'class':"social_bkm_share"}, + {'class':"social_bkm_print"}, + {'class':"block20 clearfix"}, + dict(name="div",attrs={'id':'bookreview'}), + {'class':"tag_left_ttl"}, + {'class':"tag_right"} + ] + remove_tags_after = {'class':"block20"} + + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'pheedo.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds diff --git a/resources/recipes/cnetjapan_release.recipe b/resources/recipes/cnetjapan_release.recipe new file mode 100644 index 0000000000..e8d13ec99f --- /dev/null +++ b/resources/recipes/cnetjapan_release.recipe @@ -0,0 +1,48 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class CNetJapanRelease(BasicNewsRecipe): + title = u'CNET Japan release' + oldest_article = 3 + max_articles_per_feed = 30 + __author__ = 'Hiroshi Miura' + + feeds = [(u'CNet Release', u'http://feed.japan.cnet.com/rss/release/index.rdf') ] + language = 'ja' + encoding = 'Shift_JIS' + remove_javascript = True + + preprocess_regexps = [ + (re.compile(ur'.*', re.DOTALL|re.IGNORECASE|re.UNICODE), + lambda match: ''), + (re.compile(r'.*', re.DOTALL|re.IGNORECASE), + lambda match: ''), + (re.compile(ur'.*', re.UNICODE), + lambda match: ''), + ] + + remove_tags_before = dict(id="contents_l") + remove_tags = [ + {'class':"social_bkm_share"}, + {'class':"social_bkm_print"}, + {'class':"block20 clearfix"}, + dict(name="div",attrs={'id':'bookreview'}), + {'class':"tag_left_ttl"} + ] + remove_tags_after = {'class':"block20"} + + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'pheedo.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds From 56e38290df6125f34ae8f6a18ae75b4bf7e7724e Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sun, 28 Nov 2010 23:41:46 +0900 Subject: [PATCH 6/9] recipes: fix minor bugs, add yomiuri news - remove wrong #!(hash-bang) - add yomiuri online news --- resources/recipes/endgadget_ja.recipe | 2 - resources/recipes/jijinews.recipe | 2 - resources/recipes/mainichi.recipe | 2 - resources/recipes/nikkei_free.recipe | 6 +- resources/recipes/nikkei_sub.recipe | 6 +- resources/recipes/nikkei_sub_economy.recipe | 2 - resources/recipes/nikkei_sub_industry.recipe | 1 - resources/recipes/nikkei_sub_life.recipe | 2 - resources/recipes/nikkei_sub_main.recipe | 2 - resources/recipes/nikkei_sub_sports.recipe | 1 - resources/recipes/yomiuri.recipe | 66 ++++++++++++++++++++ 11 files changed, 71 insertions(+), 21 deletions(-) create mode 100644 resources/recipes/yomiuri.recipe diff --git a/resources/recipes/endgadget_ja.recipe b/resources/recipes/endgadget_ja.recipe index 443a85905d..891e6720a5 100644 --- a/resources/recipes/endgadget_ja.recipe +++ b/resources/recipes/endgadget_ja.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/jijinews.recipe b/resources/recipes/jijinews.recipe index fe52e76aaf..4f768ce7ee 100644 --- a/resources/recipes/jijinews.recipe +++ b/resources/recipes/jijinews.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/mainichi.recipe b/resources/recipes/mainichi.recipe index 47dc7d0ebc..2a44fa0980 100644 --- a/resources/recipes/mainichi.recipe +++ b/resources/recipes/mainichi.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/nikkei_free.recipe b/resources/recipes/nikkei_free.recipe index d84aaa279b..adc596104b 100644 --- a/resources/recipes/nikkei_free.recipe +++ b/resources/recipes/nikkei_free.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' @@ -9,9 +7,9 @@ www.nikkei.com from calibre.web.feeds.news import BasicNewsRecipe class NikkeiNet(BasicNewsRecipe): - title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free)' + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free, MAX)' __author__ = 'Hiroshi Miura' - description = 'News and current market affairs from Japan' + description = 'News and current market affairs from Japan, no subscription and getting max feed.' cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' oldest_article = 2 diff --git a/resources/recipes/nikkei_sub.recipe b/resources/recipes/nikkei_sub.recipe index 95b0017339..18f324009a 100644 --- a/resources/recipes/nikkei_sub.recipe +++ b/resources/recipes/nikkei_sub.recipe @@ -5,12 +5,12 @@ from calibre.ptempfile import PersistentTemporaryFile class NikkeiNet_subscription(BasicNewsRecipe): - title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248' + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(MAX)' __author__ = 'Hiroshi Miura' - description = 'News and current market affairs from Japan' + description = 'News and current market affairs from Japan, gather MAX articles' needs_subscription = True oldest_article = 2 - max_articles_per_feed = 20 + max_articles_per_feed = 10 language = 'ja' remove_javascript = False temp_files = [] diff --git a/resources/recipes/nikkei_sub_economy.recipe b/resources/recipes/nikkei_sub_economy.recipe index d762f505d1..2dd8f1add8 100644 --- a/resources/recipes/nikkei_sub_economy.recipe +++ b/resources/recipes/nikkei_sub_economy.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/nikkei_sub_industry.recipe b/resources/recipes/nikkei_sub_industry.recipe index da04bbb5f3..81e86767d0 100644 --- a/resources/recipes/nikkei_sub_industry.recipe +++ b/resources/recipes/nikkei_sub_industry.recipe @@ -1,4 +1,3 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' diff --git a/resources/recipes/nikkei_sub_life.recipe b/resources/recipes/nikkei_sub_life.recipe index 2da5b13834..1bfa08a55f 100644 --- a/resources/recipes/nikkei_sub_life.recipe +++ b/resources/recipes/nikkei_sub_life.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/nikkei_sub_main.recipe b/resources/recipes/nikkei_sub_main.recipe index 37fc8964c4..485d2f32c0 100644 --- a/resources/recipes/nikkei_sub_main.recipe +++ b/resources/recipes/nikkei_sub_main.recipe @@ -1,5 +1,3 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' ''' diff --git a/resources/recipes/nikkei_sub_sports.recipe b/resources/recipes/nikkei_sub_sports.recipe index 6e5a1c6bb2..644b0aa252 100644 --- a/resources/recipes/nikkei_sub_sports.recipe +++ b/resources/recipes/nikkei_sub_sports.recipe @@ -1,4 +1,3 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2010, Hiroshi Miura ' diff --git a/resources/recipes/yomiuri.recipe b/resources/recipes/yomiuri.recipe new file mode 100644 index 0000000000..6335b99e32 --- /dev/null +++ b/resources/recipes/yomiuri.recipe @@ -0,0 +1,66 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.yomiuri.co.jp +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class YOLNews(BasicNewsRecipe): + title = u'YOMIURI ONLINE' + __author__ = 'Hiroshi Miura' + oldest_article = 1 + max_articles_per_feed = 50 + description = 'Japanese traditional newspaper Yomiuri Online News' + publisher = 'Yomiuri Online News' + category = 'news, japan' + language = 'ja' + encoding = 'Shift_JIS' + index = 'http://www.yomiuri.co.jp/latestnews/' + remove_javascript = True + + remove_tags_before = {'class':"article-def"} + remove_tags = [{'class':"RelatedArticle"}, + {'class:"sbtns"} + ] + remove_tags_after = {'class':"date-def"} + + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'rssad.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds + + def parse_index(self): + feeds = [] + soup = self.index_to_soup(self.index) + topstories = soup.find('ul',attrs={'class':'list-def'}) + if topstories: + newsarticles = [] + for itt in topstories.findAll('li'): + itema = itt.find('a',href=True) + if itema: + itd1 = itema.findNextSibling(text = True) + itd2 = itd1.findNextSibling(text = True) + itd3 = itd2.findNextSibling(text = True) + newsarticles.append({ + 'title' :itema.string + ,'date' :''.join([itd1, itd2, itd3]) + ,'url' :'http://www.yomiuri.co.jp' + itema['href'] + ,'description':'' + }) + feeds.append(('News', newsarticles)) + + return feeds + From 2b2a8a1edcc789010e928ae29b3d079d79de3ec6 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sun, 28 Nov 2010 23:43:35 +0900 Subject: [PATCH 7/9] recipes: fix typo --- resources/recipes/yomiuri.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/recipes/yomiuri.recipe b/resources/recipes/yomiuri.recipe index 6335b99e32..39a085bf83 100644 --- a/resources/recipes/yomiuri.recipe +++ b/resources/recipes/yomiuri.recipe @@ -22,7 +22,7 @@ class YOLNews(BasicNewsRecipe): remove_tags_before = {'class':"article-def"} remove_tags = [{'class':"RelatedArticle"}, - {'class:"sbtns"} + {'class':"sbtns"} ] remove_tags_after = {'class':"date-def"} From 5f5c41e495a50a30bb7cec0055decb0eb1156305 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sun, 28 Nov 2010 23:53:46 +0900 Subject: [PATCH 8/9] recipes: add icons and yomiuri online variant --- resources/images/news/cnetjapan_digital.png | Bin 0 -> 892 bytes resources/images/news/cnetjapan_release.png | Bin 0 -> 892 bytes resources/images/news/yomiuri.png | Bin 0 -> 660 bytes resources/recipes/yomiuri.recipe | 9 +-- resources/recipes/yomiuri_world.recipe | 63 ++++++++++++++++++++ 5 files changed, 66 insertions(+), 6 deletions(-) create mode 100644 resources/images/news/cnetjapan_digital.png create mode 100644 resources/images/news/cnetjapan_release.png create mode 100644 resources/images/news/yomiuri.png create mode 100644 resources/recipes/yomiuri_world.recipe diff --git a/resources/images/news/cnetjapan_digital.png b/resources/images/news/cnetjapan_digital.png new file mode 100644 index 0000000000000000000000000000000000000000..9a0dcc8f7fa7645db5f97ef0b5e2978133e05732 GIT binary patch literal 892 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?_rNx;TbdoKBs5HhW6A$g%qG_pTQj9XoPS zgUd~N@st@t=De* zacN=G@pZC`V(gs?9`1fEy;#%tp}*Ix>$`Wk&56!=&G>S*>#pqU6K6%ir>>}4qV!Kd6U%X$;Qn!IYp}f_bzlv?~~pAUA{rt zJu!*>^=(atyg!Ri&9Y#8@Nc6qb6I?^7PF}Y?}fTwQok29zZPMbvi?zH+gkJgi}qH2 z=kl)+EuDBoKbpJ#(w^-OYu{YzZCk}#v*OXdy$mMe(Li*^OP;yFZHd*A^ux3DuN0N9 z+n3H?wZgID6OW>?VH@oX5I){wfOzj_lxQS|M!^K8!q~{E;65oOVqVR zpkU&$wn7D=tq3fJd+cw3`+^8Y@;2bVf#Zap-8>51olH?)FK#IZ0z`#=1&`8(FG{nf* z$^?ikbPdd{3=Fup)uf?l$jwj5OshoGU~FM!XaLc0!9Hs(Py>UftDnm{r-UW|`oMrT literal 0 HcmV?d00001 diff --git a/resources/images/news/cnetjapan_release.png b/resources/images/news/cnetjapan_release.png new file mode 100644 index 0000000000000000000000000000000000000000..9a0dcc8f7fa7645db5f97ef0b5e2978133e05732 GIT binary patch literal 892 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87?_rNx;TbdoKBs5HhW6A$g%qG_pTQj9XoPS zgUd~N@st@t=De* zacN=G@pZC`V(gs?9`1fEy;#%tp}*Ix>$`Wk&56!=&G>S*>#pqU6K6%ir>>}4qV!Kd6U%X$;Qn!IYp}f_bzlv?~~pAUA{rt zJu!*>^=(atyg!Ri&9Y#8@Nc6qb6I?^7PF}Y?}fTwQok29zZPMbvi?zH+gkJgi}qH2 z=kl)+EuDBoKbpJ#(w^-OYu{YzZCk}#v*OXdy$mMe(Li*^OP;yFZHd*A^ux3DuN0N9 z+n3H?wZgID6OW>?VH@oX5I){wfOzj_lxQS|M!^K8!q~{E;65oOVqVR zpkU&$wn7D=tq3fJd+cw3`+^8Y@;2bVf#Zap-8>51olH?)FK#IZ0z`#=1&`8(FG{nf* z$^?ikbPdd{3=Fup)uf?l$jwj5OshoGU~FM!XaLc0!9Hs(Py>UftDnm{r-UW|`oMrT literal 0 HcmV?d00001 diff --git a/resources/images/news/yomiuri.png b/resources/images/news/yomiuri.png new file mode 100644 index 0000000000000000000000000000000000000000..4a197f888f076f9801d3fd8cfd3fb2479af6520a GIT binary patch literal 660 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE1|*BCs=fdzwj^(N7l!{JxM1({$v_d#0*}aI zAngIhZYQ(tK!Rljj_E*J0gT&!&6&%QHS-aIkQ~omtsC%VrC@l%9XX)LZ0NMiU= z<9D8k!J*7a)N#w3*qy?^XY`4OC-F`H&(Xx$apdftAoXw6lRvN8zmI=bvrpX7Wnnkg zJU_$`aeL9N+$Qb+nu}!=-g|v`Cu_oVua9emR+O4qXm05N7p3t0(@k&g{PFeaUi&JF zaa-7o6IatW={O7cWOerHzF@Mvd*Gvy=UL|49t`|$X`;rF%ly+c@T)33b~kkg$5!NxSz>aV>HZdQ3h~YCE1T-o;^?+rqdp z^k$Cr%9IECTKibVCDIm`vR2p3xfgvv|Nr)PSK9A})dOQxwZt`|BqgyV)hf9t6-Y4{ z85kPq8XD>vS%erFTbY;uk+y-Qm4QLK$Jd1@8glbfGSe!tXfT9mSfZP>3#fs?)78&q Iol`;+00UkKJpcdz literal 0 HcmV?d00001 diff --git a/resources/recipes/yomiuri.recipe b/resources/recipes/yomiuri.recipe index 39a085bf83..b3df1b58aa 100644 --- a/resources/recipes/yomiuri.recipe +++ b/resources/recipes/yomiuri.recipe @@ -8,7 +8,7 @@ from calibre.web.feeds.news import BasicNewsRecipe import re class YOLNews(BasicNewsRecipe): - title = u'YOMIURI ONLINE' + title = u'YOMIURI ONLINE(Latest)' __author__ = 'Hiroshi Miura' oldest_article = 1 max_articles_per_feed = 50 @@ -19,6 +19,7 @@ class YOLNews(BasicNewsRecipe): encoding = 'Shift_JIS' index = 'http://www.yomiuri.co.jp/latestnews/' remove_javascript = True + masthead_title = u'YOMIURI ONLINE' remove_tags_before = {'class':"article-def"} remove_tags = [{'class':"RelatedArticle"}, @@ -27,9 +28,7 @@ class YOLNews(BasicNewsRecipe): remove_tags_after = {'class':"date-def"} def parse_feeds(self): - feeds = BasicNewsRecipe.parse_feeds(self) - for curfeed in feeds: delList = [] for a,curarticle in enumerate(curfeed.articles): @@ -39,7 +38,6 @@ class YOLNews(BasicNewsRecipe): for d in delList: index = curfeed.articles.index(d) curfeed.articles[index:index+1] = [] - return feeds def parse_index(self): @@ -60,7 +58,6 @@ class YOLNews(BasicNewsRecipe): ,'url' :'http://www.yomiuri.co.jp' + itema['href'] ,'description':'' }) - feeds.append(('News', newsarticles)) - + feeds.append(('latest', newsarticles)) return feeds diff --git a/resources/recipes/yomiuri_world.recipe b/resources/recipes/yomiuri_world.recipe new file mode 100644 index 0000000000..0146ffa330 --- /dev/null +++ b/resources/recipes/yomiuri_world.recipe @@ -0,0 +1,63 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.yomiuri.co.jp +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class YOLNews(BasicNewsRecipe): + title = u'YOMIURI ONLINE(World)' + __author__ = 'Hiroshi Miura' + oldest_article = 2 + max_articles_per_feed = 50 + description = 'Japanese traditional newspaper Yomiuri Online News/world news' + publisher = 'Yomiuri Online News' + category = 'news, japan' + language = 'ja' + encoding = 'Shift_JIS' + index = 'http://www.yomiuri.co.jp/world/' + remove_javascript = True + masthead_title = u"YOMIURI ONLINE" + + remove_tags_before = {'class':"article-def"} + remove_tags = [{'class':"RelatedArticle"}, + {'class':"sbtns"} + ] + remove_tags_after = {'class':"date-def"} + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'rssad.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + return feeds + + def parse_index(self): + feeds = [] + soup = self.index_to_soup(self.index) + topstories = soup.find('ul',attrs={'class':'list-def'}) + if topstories: + newsarticles = [] + for itt in topstories.findAll('li'): + itema = itt.find('a',href=True) + if itema: + itd1 = itema.findNextSibling(text = True) + itd2 = itd1.findNextSibling(text = True) + itd3 = itd2.findNextSibling(text = True) + newsarticles.append({ + 'title' :itema.string + ,'date' :''.join([itd1, itd2, itd3]) + ,'url' :'http://www.yomiuri.co.jp' + itema['href'] + ,'description':'' + }) + feeds.append(('World', newsarticles)) + return feeds + From 21969e157cf02059bcab8a35849321d720007602 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sun, 28 Nov 2010 23:58:20 +0900 Subject: [PATCH 9/9] recipe: yomiuri_world: fix for fetching date field --- resources/recipes/yomiuri_world.recipe | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/resources/recipes/yomiuri_world.recipe b/resources/recipes/yomiuri_world.recipe index 0146ffa330..eae5a2a40a 100644 --- a/resources/recipes/yomiuri_world.recipe +++ b/resources/recipes/yomiuri_world.recipe @@ -50,11 +50,9 @@ class YOLNews(BasicNewsRecipe): itema = itt.find('a',href=True) if itema: itd1 = itema.findNextSibling(text = True) - itd2 = itd1.findNextSibling(text = True) - itd3 = itd2.findNextSibling(text = True) newsarticles.append({ 'title' :itema.string - ,'date' :''.join([itd1, itd2, itd3]) + ,'date' :''.join([itd1]) ,'url' :'http://www.yomiuri.co.jp' + itema['href'] ,'description':'' })