From ff6c024d2bf2947906b82241415ead9e9caeced8 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Thu, 9 Dec 2010 23:48:57 +0900 Subject: [PATCH 1/7] add Kahoku Shinpo News and pet cat blog --- resources/recipes/kahokushinpo.recipe | 32 ++++++++++++++++++++++++ resources/recipes/uninohimitu.recipe | 36 +++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 resources/recipes/kahokushinpo.recipe create mode 100644 resources/recipes/uninohimitu.recipe diff --git a/resources/recipes/kahokushinpo.recipe b/resources/recipes/kahokushinpo.recipe new file mode 100644 index 0000000000..6e084d83cc --- /dev/null +++ b/resources/recipes/kahokushinpo.recipe @@ -0,0 +1,32 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.kahoku.co.jp +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + + +class KahokuShinpoNews(BasicNewsRecipe): + title = u'Kahoku Shinpo News' + __author__ = 'Hiroshi Miura' + oldest_article = 2 + max_articles_per_feed = 20 + description = 'Tohoku regional news paper in Japan' + publisher = 'Kahoku Shinpo Sha' + category = 'news, japan' + language = 'ja' + encoding = 'Shift_JIS' + + + feeds = [(u'news', u'http://www.kahoku.co.jp/rss/index_thk.xml')] + + keep_only_tags = [ dict(id="page_title"), + dict(id="news_detail"), + dict(id="bt_title"), + {'class':"photoLeft"}, + dict(id="bt_body") + ] + remove_tags = [ {'class':"button"}] + diff --git a/resources/recipes/uninohimitu.recipe b/resources/recipes/uninohimitu.recipe new file mode 100644 index 0000000000..aac412744c --- /dev/null +++ b/resources/recipes/uninohimitu.recipe @@ -0,0 +1,36 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +http://ameblo.jp/sauta19/ +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class UniNoHimituKichiBlog(BasicNewsRecipe): + title = u'Uni secret base' + __author__ = 'Hiroshi Miura' + oldest_article = 2 + publication_type = 'blog' + max_articles_per_feed = 20 + description = 'Japanese famous Cat blog' + publisher = '' + category = 'cat, pet, japan' + language = 'ja' + encoding = 'utf-8' + + feeds = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/sauta19/rss20.xml')] + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'rssad.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + return feeds + From 34df6efff9256813718a12174ada30e04311867b Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Fri, 10 Dec 2010 09:50:09 +0900 Subject: [PATCH 2/7] recipe: add popular blog about internet technologies. --- resources/recipes/ajiajin.recipe | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 resources/recipes/ajiajin.recipe diff --git a/resources/recipes/ajiajin.recipe b/resources/recipes/ajiajin.recipe new file mode 100644 index 0000000000..c5f052982b --- /dev/null +++ b/resources/recipes/ajiajin.recipe @@ -0,0 +1,24 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +ajiajin.com/blog +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class AjiajinBlog(BasicNewsRecipe): + title = u'Ajiajin blog' + __author__ = 'Hiroshi Miura' + oldest_article = 5 + publication_type = 'blog' + max_articles_per_feed = 100 + description = 'The next generation internet trends in Japan and Asia' + publisher = '' + category = 'internet, asia, japan' + language = 'en' + encoding = 'utf-8' + + feeds = [(u'blog', u'http://feeds.feedburner.com/Asiajin')] + + From ee5e7abe0b77b6566cf1f215fcac4fe5b49ed697 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sat, 11 Dec 2010 11:30:22 +0900 Subject: [PATCH 3/7] recipe: Nikkei social - fix typo in title and function name --- resources/recipes/nikkei_sub_shakai.recipe | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/recipes/nikkei_sub_shakai.recipe b/resources/recipes/nikkei_sub_shakai.recipe index ed86493265..9a53e910e6 100644 --- a/resources/recipes/nikkei_sub_shakai.recipe +++ b/resources/recipes/nikkei_sub_shakai.recipe @@ -10,8 +10,8 @@ import mechanize from calibre.ptempfile import PersistentTemporaryFile -class NikkeiNet_sub_life(BasicNewsRecipe): - title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)' +class NikkeiNet_sub_shakai(BasicNewsRecipe): + title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Social)' __author__ = 'Hiroshi Miura' description = 'News and current market affairs from Japan' cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' From a43274e55a4060bf864ecf1c8f54c64b0c3cee5f Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sun, 12 Dec 2010 12:56:52 +0900 Subject: [PATCH 4/7] recipe: add paper.li recipes --- resources/recipes/paperli.recipe | 58 +++++++++++++++++++++++++ resources/recipes/paperli_topic.recipe | 59 ++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 resources/recipes/paperli.recipe create mode 100644 resources/recipes/paperli_topic.recipe diff --git a/resources/recipes/paperli.recipe b/resources/recipes/paperli.recipe new file mode 100644 index 0000000000..2c99e5dc81 --- /dev/null +++ b/resources/recipes/paperli.recipe @@ -0,0 +1,58 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +paperli +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre import strftime +import re, sys + +class paperli(BasicNewsRecipe): +#-------------------please change here ---------------- + paperli_tag = 'osm' + title = u'The # osm Daily - paperli' +#------------------------------------------------------------- + base_url = 'http://paper.li' + index = '/tag/'+paperli_tag+'/~list' + + __author__ = 'Hiroshi Miura' + oldest_article = 7 + max_articles_per_feed = 100 + description = 'paper.li page' + publisher = 'paper.li' + category = 'paper.li' + language = 'en' + encoding = 'utf-8' + remove_javascript = True + timefmt = '[%y/%m/%d]' + + def parse_index(self): + feeds = [] + newsarticles = [] + topic = 'HEADLINE' + + #for pages + page = self.index + while True: + soup = self.index_to_soup(''.join([self.base_url,page])) + for itt in soup.findAll('div',attrs={'class':'yui-u'}): + itema = itt.find('a',href=True,attrs={'class':'ts'}) + if itema is not None: + itemd = itt.find('div',text=True, attrs={'class':'text'}) + newsarticles.append({ + 'title' :itema.string + ,'date' :strftime(self.timefmt) + ,'url' :itema['href'] + ,'description':itemd.string + }) + + nextpage = soup.find('div',attrs={'class':'pagination_top'}).find('li', attrs={'class':'next'}) + if nextpage is not None: + page = nextpage.find('a', href=True)['href'] + else: + break + + feeds.append((topic, newsarticles)) + return feeds + diff --git a/resources/recipes/paperli_topic.recipe b/resources/recipes/paperli_topic.recipe new file mode 100644 index 0000000000..3906af362f --- /dev/null +++ b/resources/recipes/paperli_topic.recipe @@ -0,0 +1,59 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +paperli +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre import strftime +import re + +class paperli_topics(BasicNewsRecipe): +#-------------------please change here ---------------- + paperli_tag = 'wikileaks' + title = u'The # wikileaks Daily - paperli' +#------------------------------------------------------------- + __author__ = 'Hiroshi Miura' + oldest_article = 7 + max_articles_per_feed = 100 + description = 'paper.li page about '+ paperli_tag + publisher = 'paper.li' + category = 'paper.li' + language = 'en' + encoding = 'utf-8' + remove_javascript = True + masthead_title = u'The '+ paperli_tag +' Daily' + timefmt = '[%y/%m/%d]' + base_url = 'http://paper.li' + index = base_url+'/tag/'+paperli_tag + + + def parse_index(self): + + # get topics + topics = [] + soup = self.index_to_soup(self.index) + topics_lists = soup.find('div',attrs={'class':'paper-nav-bottom'}) + for item in topics_lists.findAll('li', attrs={'class':""}): + itema = item.find('a',href=True) + topics.append({'title': itema.string, 'url': itema['href']}) + + #get feeds + feeds = [] + for topic in topics: + newsarticles = [] + soup = self.index_to_soup(''.join([self.base_url, topic['url'] ])) + topstories = soup.findAll('div',attrs={'class':'yui-u'}) + for itt in topstories: + itema = itt.find('a',href=True,attrs={'class':'ts'}) + if itema is not None: + itemd = itt.find('div',text=True, attrs={'class':'text'}) + newsarticles.append({ + 'title' :itema.string + ,'date' :strftime(self.timefmt) + ,'url' :itema['href'] + ,'description':itemd.string + }) + feeds.append((topic['title'], newsarticles)) + return feeds + From 1efd975625c1f32e52722f7ab18e3f099496c274 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sun, 12 Dec 2010 12:58:32 +0900 Subject: [PATCH 5/7] recipe: fix kahoku shinpo --- resources/recipes/kahokushinpo.recipe | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/recipes/kahokushinpo.recipe b/resources/recipes/kahokushinpo.recipe index 6e084d83cc..172014d3a0 100644 --- a/resources/recipes/kahokushinpo.recipe +++ b/resources/recipes/kahokushinpo.recipe @@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class KahokuShinpoNews(BasicNewsRecipe): - title = u'Kahoku Shinpo News' + title = u'\u6cb3\u5317\u65b0\u5831' __author__ = 'Hiroshi Miura' oldest_article = 2 max_articles_per_feed = 20 @@ -18,7 +18,7 @@ class KahokuShinpoNews(BasicNewsRecipe): category = 'news, japan' language = 'ja' encoding = 'Shift_JIS' - + no_stylesheets = True feeds = [(u'news', u'http://www.kahoku.co.jp/rss/index_thk.xml')] From d18bef33e11c20be510339e9ffe7bca665ff6dde Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sun, 12 Dec 2010 22:28:55 +0900 Subject: [PATCH 6/7] recipe: add national geographic news - national geographic Japan - national geographic News --- resources/recipes/nationalgeographic.recipe | 38 +++++++++++++++++++ resources/recipes/nationalgeographicjp.recipe | 20 ++++++++++ 2 files changed, 58 insertions(+) create mode 100644 resources/recipes/nationalgeographic.recipe create mode 100644 resources/recipes/nationalgeographicjp.recipe diff --git a/resources/recipes/nationalgeographic.recipe b/resources/recipes/nationalgeographic.recipe new file mode 100644 index 0000000000..b540f9b044 --- /dev/null +++ b/resources/recipes/nationalgeographic.recipe @@ -0,0 +1,38 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +nationalgeographic.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class NationalGeographicNews(BasicNewsRecipe): + title = u'National Geographic News' + oldest_article = 7 + max_articles_per_feed = 100 + remove_javascript = True + no_stylesheets = True + use_embedded_content = False + + feeds = [(u'news', u'http://feeds.nationalgeographic.com/ng/News/News_Main')] + + remove_tags_before = dict(id='page_head') + remove_tags_after = [dict(id='social_buttons'),{'class':'aside'}] + remove_tags = [ + {'class':'hidden'} + + ] + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'ads\.pheedo\.com', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + return feeds diff --git a/resources/recipes/nationalgeographicjp.recipe b/resources/recipes/nationalgeographicjp.recipe new file mode 100644 index 0000000000..5798acb102 --- /dev/null +++ b/resources/recipes/nationalgeographicjp.recipe @@ -0,0 +1,20 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +nationalgeographic.co.jp +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class NationalGeoJp(BasicNewsRecipe): + title = u'\u30ca\u30b7\u30e7\u30ca\u30eb\u30fb\u30b8\u30aa\u30b0\u30e9\u30d5\u30a3\u30c3\u30af\u30cb\u30e5\u30fc\u30b9' + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + + feeds = [(u'news', u'http://www.nationalgeographic.co.jp/news/rss.php')] + + def print_version(self, url): + return re.sub(r'news_article.php','news_printer_friendly.php', url) + From c3bbe2cc8659db1c13bf4f001c09bb3e3f658145 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sun, 12 Dec 2010 22:46:55 +0900 Subject: [PATCH 7/7] recipe: add dog blog in Japanese --- resources/recipes/chouchoublog.recipe | 37 +++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 resources/recipes/chouchoublog.recipe diff --git a/resources/recipes/chouchoublog.recipe b/resources/recipes/chouchoublog.recipe new file mode 100644 index 0000000000..8c953deef0 --- /dev/null +++ b/resources/recipes/chouchoublog.recipe @@ -0,0 +1,37 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +http://ameblo.jp/ +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class SakuraBlog(BasicNewsRecipe): + title = u'chou chou blog' + __author__ = 'Hiroshi Miura' + oldest_article = 4 + publication_type = 'blog' + max_articles_per_feed = 20 + description = 'Japanese popular dog blog' + publisher = '' + category = 'dog, pet, japan' + language = 'ja' + encoding = 'utf-8' + use_embedded_content = True + + feeds = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/chouchou1218/rss20.xml')] + + def parse_feeds(self): + feeds = BasicNewsRecipe.parse_feeds(self) + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'rssad.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + return feeds +