From ff6c024d2bf2947906b82241415ead9e9caeced8 Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Thu, 9 Dec 2010 23:48:57 +0900
Subject: [PATCH 1/7] add Kahoku Shinpo News and pet cat blog

---
 resources/recipes/kahokushinpo.recipe | 32 ++++++++++++++++++++++++
 resources/recipes/uninohimitu.recipe  | 36 +++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 resources/recipes/kahokushinpo.recipe
 create mode 100644 resources/recipes/uninohimitu.recipe

diff --git a/resources/recipes/kahokushinpo.recipe b/resources/recipes/kahokushinpo.recipe
new file mode 100644
index 0000000000..6e084d83cc
--- /dev/null
+++ b/resources/recipes/kahokushinpo.recipe
@@ -0,0 +1,32 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.kahoku.co.jp
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class KahokuShinpoNews(BasicNewsRecipe):
+    title          = u'Kahoku Shinpo News'
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 2
+    max_articles_per_feed = 20
+    description    = 'Tohoku regional news paper in Japan'
+    publisher      = 'Kahoku Shinpo Sha'
+    category       = 'news, japan'
+    language       = 'ja'
+    encoding      = 'Shift_JIS'
+
+
+    feeds          = [(u'news', u'http://www.kahoku.co.jp/rss/index_thk.xml')]
+
+    keep_only_tags = [ dict(id="page_title"),
+                                   dict(id="news_detail"),
+                                   dict(id="bt_title"),
+                                   {'class':"photoLeft"},
+                                   dict(id="bt_body")
+                                 ]
+    remove_tags = [ {'class':"button"}]
+
diff --git a/resources/recipes/uninohimitu.recipe b/resources/recipes/uninohimitu.recipe
new file mode 100644
index 0000000000..aac412744c
--- /dev/null
+++ b/resources/recipes/uninohimitu.recipe
@@ -0,0 +1,36 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+http://ameblo.jp/sauta19/
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class UniNoHimituKichiBlog(BasicNewsRecipe):
+    title          = u'Uni secret base'
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 2
+    publication_type = 'blog'
+    max_articles_per_feed = 20
+    description    = 'Japanese famous Cat blog'
+    publisher      = ''
+    category       = 'cat, pet, japan'
+    language       = 'ja'
+    encoding      = 'utf-8'
+
+    feeds          = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/sauta19/rss20.xml')]
+
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'rssad.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+        return feeds
+

From 34df6efff9256813718a12174ada30e04311867b Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Fri, 10 Dec 2010 09:50:09 +0900
Subject: [PATCH 2/7] recipe: add popular blog about internet technologies.

---
 resources/recipes/ajiajin.recipe | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 resources/recipes/ajiajin.recipe

diff --git a/resources/recipes/ajiajin.recipe b/resources/recipes/ajiajin.recipe
new file mode 100644
index 0000000000..c5f052982b
--- /dev/null
+++ b/resources/recipes/ajiajin.recipe
@@ -0,0 +1,24 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+ajiajin.com/blog
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AjiajinBlog(BasicNewsRecipe):
+    title          = u'Ajiajin blog'
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 5
+    publication_type = 'blog'
+    max_articles_per_feed = 100
+    description    = 'The next generation internet trends in Japan and Asia'
+    publisher      = ''
+    category       = 'internet, asia, japan'
+    language       = 'en'
+    encoding      = 'utf-8'
+
+    feeds          = [(u'blog', u'http://feeds.feedburner.com/Asiajin')]
+
+

From ee5e7abe0b77b6566cf1f215fcac4fe5b49ed697 Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Sat, 11 Dec 2010 11:30:22 +0900
Subject: [PATCH 3/7] recipe: Nikkei social   - fix typo in title and function
 name

---
 resources/recipes/nikkei_sub_shakai.recipe | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/recipes/nikkei_sub_shakai.recipe b/resources/recipes/nikkei_sub_shakai.recipe
index ed86493265..9a53e910e6 100644
--- a/resources/recipes/nikkei_sub_shakai.recipe
+++ b/resources/recipes/nikkei_sub_shakai.recipe
@@ -10,8 +10,8 @@ import mechanize
 from calibre.ptempfile import PersistentTemporaryFile
 
 
-class NikkeiNet_sub_life(BasicNewsRecipe):
-    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
+class NikkeiNet_sub_shakai(BasicNewsRecipe):
+    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Social)'
     __author__      = 'Hiroshi Miura'
     description     = 'News and current market affairs from Japan'
     cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'

From a43274e55a4060bf864ecf1c8f54c64b0c3cee5f Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Sun, 12 Dec 2010 12:56:52 +0900
Subject: [PATCH 4/7] recipe: add paper.li recipes

---
 resources/recipes/paperli.recipe       | 58 +++++++++++++++++++++++++
 resources/recipes/paperli_topic.recipe | 59 ++++++++++++++++++++++++++
 2 files changed, 117 insertions(+)
 create mode 100644 resources/recipes/paperli.recipe
 create mode 100644 resources/recipes/paperli_topic.recipe

diff --git a/resources/recipes/paperli.recipe b/resources/recipes/paperli.recipe
new file mode 100644
index 0000000000..2c99e5dc81
--- /dev/null
+++ b/resources/recipes/paperli.recipe
@@ -0,0 +1,58 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+paperli
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre import strftime
+import re, sys
+
+class paperli(BasicNewsRecipe):
+#-------------------please change here ----------------
+    paperli_tag = 'osm'
+    title          = u'The # osm Daily - paperli'
+#-------------------------------------------------------------
+    base_url     = 'http://paper.li'
+    index          = '/tag/'+paperli_tag+'/~list'
+
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    description    = 'paper.li page'
+    publisher      = 'paper.li'
+    category       = 'paper.li'
+    language       = 'en'
+    encoding       = 'utf-8'
+    remove_javascript = True
+    timefmt        = '[%y/%m/%d]'
+
+    def parse_index(self):
+        feeds = []
+        newsarticles = []
+        topic = 'HEADLINE'
+
+        #for pages
+        page = self.index
+        while True:
+            soup = self.index_to_soup(''.join([self.base_url,page]))
+            for itt in soup.findAll('div',attrs={'class':'yui-u'}):
+                itema = itt.find('a',href=True,attrs={'class':'ts'})
+                if itema is not None:
+                    itemd = itt.find('div',text=True, attrs={'class':'text'})
+                    newsarticles.append({
+                                      'title'      :itema.string
+                                     ,'date'     :strftime(self.timefmt)
+                                     ,'url'        :itema['href']
+                                     ,'description':itemd.string
+                                    })
+
+            nextpage = soup.find('div',attrs={'class':'pagination_top'}).find('li', attrs={'class':'next'})
+            if nextpage is not None:
+                page = nextpage.find('a', href=True)['href']
+            else:
+                break
+
+        feeds.append((topic, newsarticles))
+        return feeds
+
diff --git a/resources/recipes/paperli_topic.recipe b/resources/recipes/paperli_topic.recipe
new file mode 100644
index 0000000000..3906af362f
--- /dev/null
+++ b/resources/recipes/paperli_topic.recipe
@@ -0,0 +1,59 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+paperli
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre import strftime
+import re
+
+class paperli_topics(BasicNewsRecipe):
+#-------------------please change here ----------------
+    paperli_tag = 'wikileaks'
+    title          = u'The # wikileaks Daily - paperli'
+#-------------------------------------------------------------
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    description    = 'paper.li page about '+ paperli_tag
+    publisher      = 'paper.li'
+    category       = 'paper.li'
+    language       = 'en'
+    encoding       = 'utf-8'
+    remove_javascript = True
+    masthead_title = u'The '+ paperli_tag +' Daily'
+    timefmt        = '[%y/%m/%d]'
+    base_url     = 'http://paper.li'          
+    index          = base_url+'/tag/'+paperli_tag
+
+
+    def parse_index(self):
+
+        # get topics
+        topics = []
+        soup   = self.index_to_soup(self.index)
+        topics_lists = soup.find('div',attrs={'class':'paper-nav-bottom'})
+        for item in topics_lists.findAll('li', attrs={'class':""}):
+            itema = item.find('a',href=True)
+            topics.append({'title': itema.string, 'url': itema['href']})
+
+        #get feeds
+        feeds = []
+        for topic in topics:
+            newsarticles = []
+            soup   = self.index_to_soup(''.join([self.base_url, topic['url'] ]))
+            topstories = soup.findAll('div',attrs={'class':'yui-u'})
+            for itt in topstories:
+                itema = itt.find('a',href=True,attrs={'class':'ts'})
+                if itema is not None:
+                    itemd = itt.find('div',text=True, attrs={'class':'text'})
+                    newsarticles.append({
+                                      'title'      :itema.string
+                                     ,'date'     :strftime(self.timefmt)
+                                     ,'url'        :itema['href']
+                                     ,'description':itemd.string
+                                    })
+            feeds.append((topic['title'], newsarticles))
+        return feeds
+

From 1efd975625c1f32e52722f7ab18e3f099496c274 Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Sun, 12 Dec 2010 12:58:32 +0900
Subject: [PATCH 5/7] recipe: fix kahoku shinpo

---
 resources/recipes/kahokushinpo.recipe | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/recipes/kahokushinpo.recipe b/resources/recipes/kahokushinpo.recipe
index 6e084d83cc..172014d3a0 100644
--- a/resources/recipes/kahokushinpo.recipe
+++ b/resources/recipes/kahokushinpo.recipe
@@ -9,7 +9,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 
 
 class KahokuShinpoNews(BasicNewsRecipe):
-    title          = u'Kahoku Shinpo News'
+    title          = u'\u6cb3\u5317\u65b0\u5831'
     __author__     = 'Hiroshi Miura'
     oldest_article = 2
     max_articles_per_feed = 20
@@ -18,7 +18,7 @@ class KahokuShinpoNews(BasicNewsRecipe):
     category       = 'news, japan'
     language       = 'ja'
     encoding      = 'Shift_JIS'
-
+    no_stylesheets = True
 
     feeds          = [(u'news', u'http://www.kahoku.co.jp/rss/index_thk.xml')]
 

From d18bef33e11c20be510339e9ffe7bca665ff6dde Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Sun, 12 Dec 2010 22:28:55 +0900
Subject: [PATCH 6/7] recipe: add national geographic news

   - national geographic Japan
   - national geographic News
---
 resources/recipes/nationalgeographic.recipe   | 38 +++++++++++++++++++
 resources/recipes/nationalgeographicjp.recipe | 20 ++++++++++
 2 files changed, 58 insertions(+)
 create mode 100644 resources/recipes/nationalgeographic.recipe
 create mode 100644 resources/recipes/nationalgeographicjp.recipe

diff --git a/resources/recipes/nationalgeographic.recipe b/resources/recipes/nationalgeographic.recipe
new file mode 100644
index 0000000000..b540f9b044
--- /dev/null
+++ b/resources/recipes/nationalgeographic.recipe
@@ -0,0 +1,38 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+nationalgeographic.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class NationalGeographicNews(BasicNewsRecipe):
+    title          = u'National Geographic News'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_javascript = True
+    no_stylesheets = True
+    use_embedded_content = False
+
+    feeds          = [(u'news', u'http://feeds.nationalgeographic.com/ng/News/News_Main')]
+
+    remove_tags_before = dict(id='page_head')
+    remove_tags_after = [dict(id='social_buttons'),{'class':'aside'}]
+    remove_tags = [
+                       {'class':'hidden'}
+                       
+                     ]
+
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'ads\.pheedo\.com', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+        return feeds
diff --git a/resources/recipes/nationalgeographicjp.recipe b/resources/recipes/nationalgeographicjp.recipe
new file mode 100644
index 0000000000..5798acb102
--- /dev/null
+++ b/resources/recipes/nationalgeographicjp.recipe
@@ -0,0 +1,20 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+nationalgeographic.co.jp
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class NationalGeoJp(BasicNewsRecipe):
+    title          = u'\u30ca\u30b7\u30e7\u30ca\u30eb\u30fb\u30b8\u30aa\u30b0\u30e9\u30d5\u30a3\u30c3\u30af\u30cb\u30e5\u30fc\u30b9'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+
+    feeds          = [(u'news', u'http://www.nationalgeographic.co.jp/news/rss.php')]
+
+    def print_version(self, url):
+        return re.sub(r'news_article.php','news_printer_friendly.php', url)
+

From c3bbe2cc8659db1c13bf4f001c09bb3e3f658145 Mon Sep 17 00:00:00 2001
From: Hiroshi Miura <miurahr@linux.com>
Date: Sun, 12 Dec 2010 22:46:55 +0900
Subject: [PATCH 7/7] recipe: add dog blog in Japanese

---
 resources/recipes/chouchoublog.recipe | 37 +++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 resources/recipes/chouchoublog.recipe

diff --git a/resources/recipes/chouchoublog.recipe b/resources/recipes/chouchoublog.recipe
new file mode 100644
index 0000000000..8c953deef0
--- /dev/null
+++ b/resources/recipes/chouchoublog.recipe
@@ -0,0 +1,37 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+http://ameblo.jp/
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class SakuraBlog(BasicNewsRecipe):
+    title          = u'chou chou blog'
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 4
+    publication_type = 'blog'
+    max_articles_per_feed = 20
+    description    = 'Japanese popular dog blog'
+    publisher      = ''
+    category       = 'dog, pet, japan'
+    language       = 'ja'
+    encoding      = 'utf-8'
+    use_embedded_content = True
+
+    feeds          = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/chouchou1218/rss20.xml')]
+
+    def parse_feeds(self):
+        feeds = BasicNewsRecipe.parse_feeds(self)
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'rssad.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+        return feeds
+