Update various Japanese news sources

Merge branch 'update_japanese_recipes_201405' of https://github.com/miurahr/calibre
2025-07-09 03:04:10 -04:00 · 2014-05-07 06:30:18 +05:30 · 2014-05-07 06:30:18 +05:30 · 7be851a6a3
commit 7be851a6a3
parent c436362416 4bb290657d
13 changed files with 66 additions and 734 deletions
--- a/recipes/chouchoublog.recipe
+++ b/recipes/chouchoublog.recipe
@ -1,37 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
-'''
-http://ameblo.jp/
-'''
-
-import re
-from calibre.web.feeds.news import BasicNewsRecipe
-
-class SakuraBlog(BasicNewsRecipe):
-    title          = u'chou chou blog'
-    __author__     = 'Hiroshi Miura'
-    oldest_article = 4
-    publication_type = 'blog'
-    max_articles_per_feed = 20
-    description    = 'Japanese popular dog blog'
-    publisher      = ''
-    category       = 'dog, pet, japan'
-    language       = 'ja'
-    encoding      = 'utf-8'
-    use_embedded_content = True
-
-    feeds          = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/chouchou1218/rss20.xml')]
-
-    def parse_feeds(self):
-        feeds = BasicNewsRecipe.parse_feeds(self)
-        for curfeed in feeds:
-            delList = []
-            for a,curarticle in enumerate(curfeed.articles):
-                if re.search(r'rssad.jp', curarticle.url):
-                    delList.append(curarticle)
-            if len(delList)>0:
-                for d in delList:
-                    index = curfeed.articles.index(d)
-                    curfeed.articles[index:index+1] = []
-        return feeds
-
--- a/recipes/endgadget_ja.recipe
+++ b/recipes/endgadget_ja.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
 '''
 japan.engadget.com
 '''
@ -20,19 +20,20 @@ class EndgadgetJapan(BasicNewsRecipe):
    index = 'http://japanese.engadget.com/'
    remove_javascript = True

-    remove_tags_before = dict(name="h1", attrs={'class':"post_title"})
-    remove_tags_after = dict(name='div', attrs={'class':'post_body'})
+    remove_tags_before = dict(name="header", attrs={'class':"header"})
+    remove_tags_after = dict(name='div', attrs={'class':'post-meta'})

    def parse_index(self):
        feeds = []
        newsarticles = []
        soup   = self.index_to_soup(self.index)
-        for topstories in soup.findAll('div',attrs={'class':'post_content'}):
-           itt = topstories.find('h4')
+        for topstories in soup.findAll('header',attrs={'class':'post-header'}):
+           itt = topstories.find('h2')
           itema = itt.find('a',href=True)
+           itemtime = topstories.find('span',attrs={'class':'time'})
           newsarticles.append({
                                      'title'      :itema.string
-                                     ,'date'       :''
+                                     ,'date'       :itemtime.string
                                     ,'url'        :itema['href']
                                     ,'description':''
                                    })
--- a/recipes/kahokushinpo.recipe
+++ b/recipes/kahokushinpo.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.kahoku.co.jp
 '''
@ -21,11 +21,5 @@ class KahokuShinpoNews(BasicNewsRecipe):

    feeds          = [(u'news', u'http://www.kahoku.co.jp/rss/index_thk.xml')]

-    keep_only_tags = [ dict(id="page_title"),
-                                   dict(id="news_detail"),
-                                   dict(id="bt_title"),
-                                   {'class':"photoLeft"},
-                                   dict(id="bt_body")
-                                 ]
-    remove_tags = [ {'class':"button"}]
+    keep_only_tags = [ {'class':"category"},{'class':"ttl"},{'class':'photoimg'},{'class':"txt"},{'class':"data"}]

--- a/recipes/msnsankei.recipe
+++ b/recipes/msnsankei.recipe
@ -1,6 +1,6 @@

 __license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
 '''
 sankei.jp.msn.com
 '''
@ -20,5 +20,4 @@ class MSNSankeiNewsProduct(BasicNewsRecipe):

    feeds          = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')]

-    remove_tags_before = dict(id="NewsTitle")
-    remove_tags_after  = dict(id="RelatedTitle")
+    keep_only_tags = [dict(id=['MainContent'])]
--- a/recipes/nikkei_sub_economy.recipe
+++ b/recipes/nikkei_sub_economy.recipe
@ -1,110 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
-'''
-www.nikkei.com
-'''
-
-import re
-from calibre.web.feeds.recipes import BasicNewsRecipe
-import mechanize
-from calibre.ptempfile import PersistentTemporaryFile
-
-class NikkeiNet_sub_economy(BasicNewsRecipe):
-    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7d4c\u6e08)'
-    __author__      = 'Hiroshi Miura'
-    description     = 'News and current market affairs from Japan'
-    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
-    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
-    needs_subscription = True
-    oldest_article  = 2
-    max_articles_per_feed = 20
-    language        = 'ja'
-    remove_javascript = False
-    temp_files = []
-
-    remove_tags_before = {'class':"cmn-section cmn-indent"}
-    remove_tags = [
-                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
-                       {'class':"cmn-article_keyword cmn-clearfix"},
-                       {'class':"cmn-print_headline cmn-clearfix"},
-                       {'class':"cmn-article_list"},
-                       dict(id="ABOUT-NIKKEI"),
-                       {'class':"cmn-sub_market"},
-                         ]
-    remove_tags_after = {'class':"cmn-pr_list"}
-
-    feeds = [  (u'\u653f\u6cbb', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
-		 (u'\u8ca1\u52d9', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
-		 (u'\u7d4c\u6e08', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
-		 (u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
-		 (u'\u96c7\u7528', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
-		 (u'\u6559\u80b2', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
-		 (u'\u304a\u304f\u3084\u307f', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
-		 (u'\u4eba\u4e8b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
-        ]
-
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-
-        cj = mechanize.LWPCookieJar()
-        br.set_cookiejar(cj)
-
-        #br.set_debug_http(True)
-        #br.set_debug_redirects(True)
-        #br.set_debug_responses(True)
-
-        if self.username is not None and self.password is not None:
-            #print "----------------------------get login form--------------------------------------------"
-            # open login form
-            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
-            response = br.response()
-            #print "----------------------------get login form---------------------------------------------"
-            #print "----------------------------set login form---------------------------------------------"
-            # remove disabled input which brings error on mechanize
-            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
-            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
-            br.set_response(response)
-            br.select_form(name='LA0010Form01')
-            br['LA0010Form01:LA0010Email']   = self.username
-            br['LA0010Form01:LA0010Password'] = self.password
-            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
-            br.submit()
-            br.response()
-            #print "----------------------------send login form---------------------------------------------"
-            #print "----------------------------open news main page-----------------------------------------"
-            # open news site
-            br.open('http://www.nikkei.com/')
-            br.response()
-            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
-            #print response2.get_data()
-            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
-            # forced redirect in default
-            br.select_form(nr=0)
-            br.submit()
-            response3 = br.response()
-            # return some cookie which should be set by Javascript
-            #print response3.geturl()
-            raw = response3.get_data()
-            #print "---------------------------response to form --------------------------------------------"
-            # grab cookie from JS and set it
-            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
-            br.select_form(nr=0)
-
-            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
-            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
-
-            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
-            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
-            self.temp_files[-1].close()
-            cj.load(self.temp_files[-1].name)
-
-            br.submit()
-
-            #br.set_debug_http(False)
-            #br.set_debug_redirects(False)
-            #br.set_debug_responses(False)
-        return br
-
-
-
-
--- a/recipes/nikkei_sub_industry.recipe
+++ b/recipes/nikkei_sub_industry.recipe
@ -1,107 +0,0 @@
-
-__license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
-'''
-www.nikkei.com
-'''
-
-import re
-from calibre.web.feeds.recipes import BasicNewsRecipe
-import mechanize
-from calibre.ptempfile import PersistentTemporaryFile
-
-
-class NikkeiNet_sub_industory(BasicNewsRecipe):
-    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7523\u696d)'
-    __author__      = 'Hiroshi Miura'
-    description     = 'News and current market affairs from Japan'
-    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
-    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
-    needs_subscription = True
-    oldest_article  = 2
-    max_articles_per_feed = 20
-    language        = 'ja'
-    remove_javascript = False
-    temp_files = []
-
-    remove_tags_before = {'class':"cmn-section cmn-indent"}
-    remove_tags = [
-                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
-                       {'class':"cmn-article_keyword cmn-clearfix"},
-                       {'class':"cmn-print_headline cmn-clearfix"},
-                         ]
-    remove_tags_after = {'class':"cmn-pr_list"}
-
-    feeds = [ (u'\u65e5\u7d4c\u4f01\u696d', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
-		 (u'\u65e5\u7d4c\u88fd\u54c1',	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
-		 (u'internet',		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
-		 (u'\u56fd\u969b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
-		 (u'\u79d1\u5b66', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
-
-        ]
-
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-
-        cj = mechanize.LWPCookieJar()
-        br.set_cookiejar(cj)
-
-        #br.set_debug_http(True)
-        #br.set_debug_redirects(True)
-        #br.set_debug_responses(True)
-
-        if self.username is not None and self.password is not None:
-            #print "----------------------------get login form--------------------------------------------"
-            # open login form
-            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
-            response = br.response()
-            #print "----------------------------get login form---------------------------------------------"
-            #print "----------------------------set login form---------------------------------------------"
-            # remove disabled input which brings error on mechanize
-            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
-            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
-            br.set_response(response)
-            br.select_form(name='LA0010Form01')
-            br['LA0010Form01:LA0010Email']   = self.username
-            br['LA0010Form01:LA0010Password'] = self.password
-            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
-            br.submit()
-            br.response()
-            #print "----------------------------send login form---------------------------------------------"
-            #print "----------------------------open news main page-----------------------------------------"
-            # open news site
-            br.open('http://www.nikkei.com/')
-            br.response()
-            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
-            #print response2.get_data()
-            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
-            # forced redirect in default
-            br.select_form(nr=0)
-            br.submit()
-            response3 = br.response()
-            # return some cookie which should be set by Javascript
-            #print response3.geturl()
-            raw = response3.get_data()
-            #print "---------------------------response to form --------------------------------------------"
-            # grab cookie from JS and set it
-            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
-            br.select_form(nr=0)
-
-            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
-            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
-
-            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
-            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
-            self.temp_files[-1].close()
-            cj.load(self.temp_files[-1].name)
-
-            br.submit()
-
-            #br.set_debug_http(False)
-            #br.set_debug_redirects(False)
-            #br.set_debug_responses(False)
-        return br
-
-
-
-
--- a/recipes/nikkei_sub_life.recipe
+++ b/recipes/nikkei_sub_life.recipe
@ -1,104 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
-'''
-www.nikkei.com
-'''
-
-import re
-from calibre.web.feeds.recipes import BasicNewsRecipe
-import mechanize
-from calibre.ptempfile import PersistentTemporaryFile
-
-
-class NikkeiNet_sub_life(BasicNewsRecipe):
-    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
-    __author__      = 'Hiroshi Miura'
-    description     = 'News and current market affairs from Japan'
-    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
-    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
-    needs_subscription = True
-    oldest_article  = 2
-    max_articles_per_feed = 20
-    language        = 'ja'
-    remove_javascript = False
-    temp_files = []
-
-    remove_tags_before = {'class':"cmn-section cmn-indent"}
-    remove_tags = [
-                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
-                       {'class':"cmn-article_keyword cmn-clearfix"},
-                       {'class':"cmn-print_headline cmn-clearfix"},
-                         ]
-    remove_tags_after = {'class':"cmn-pr_list"}
-
-    feeds = [  (u'\u304f\u3089\u3057', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
-		 (u'\u30a8\u30b3', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
-		 (u'\u5065\u5eb7', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
-		 (u'\u7279\u96c6', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special')
-        ]
-
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-
-        cj = mechanize.LWPCookieJar()
-        br.set_cookiejar(cj)
-
-        #br.set_debug_http(True)
-        #br.set_debug_redirects(True)
-        #br.set_debug_responses(True)
-
-        if self.username is not None and self.password is not None:
-            #print "----------------------------get login form--------------------------------------------"
-            # open login form
-            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
-            response = br.response()
-            #print "----------------------------get login form---------------------------------------------"
-            #print "----------------------------set login form---------------------------------------------"
-            # remove disabled input which brings error on mechanize
-            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
-            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
-            br.set_response(response)
-            br.select_form(name='LA0010Form01')
-            br['LA0010Form01:LA0010Email']   = self.username
-            br['LA0010Form01:LA0010Password'] = self.password
-            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
-            br.submit()
-            br.response()
-            #print "----------------------------send login form---------------------------------------------"
-            #print "----------------------------open news main page-----------------------------------------"
-            # open news site
-            br.open('http://www.nikkei.com/')
-            br.response()
-            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
-            #print response2.get_data()
-            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
-            # forced redirect in default
-            br.select_form(nr=0)
-            br.submit()
-            response3 = br.response()
-            # return some cookie which should be set by Javascript
-            #print response3.geturl()
-            raw = response3.get_data()
-            #print "---------------------------response to form --------------------------------------------"
-            # grab cookie from JS and set it
-            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
-            br.select_form(nr=0)
-
-            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
-            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
-
-            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
-            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
-            self.temp_files[-1].close()
-            cj.load(self.temp_files[-1].name)
-
-            br.submit()
-
-            #br.set_debug_http(False)
-            #br.set_debug_redirects(False)
-            #br.set_debug_responses(False)
-        return br
-
-
-
-
--- a/recipes/nikkei_sub_main.recipe
+++ b/recipes/nikkei_sub_main.recipe
@ -1,103 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
-'''
-www.nikkei.com
-'''
-
-import re
-from calibre.web.feeds.recipes import BasicNewsRecipe
-import mechanize
-from calibre.ptempfile import PersistentTemporaryFile
-
-
-class NikkeiNet_sub_main(BasicNewsRecipe):
-    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7dcf\u5408)'
-    __author__      = 'Hiroshi Miura'
-    description     = 'News and current market affairs from Japan'
-    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
-    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
-    needs_subscription = True
-    oldest_article  = 2
-    max_articles_per_feed = 20
-    language        = 'ja'
-    remove_javascript = False
-    temp_files = []
-
-    remove_tags_before = {'class':"cmn-section cmn-indent"}
-    remove_tags = [
-                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
-                       {'class':"cmn-article_keyword cmn-clearfix"},
-                       {'class':"cmn-print_headline cmn-clearfix"},
-                       {'class':"cmn-article_list"},
-                       {'class':"cmn-dashedline"},
-                       {'class':"cmn-hide"},
-                         ]
-    remove_tags_after = {'class':"cmn-pr_list"}
-
-    feeds = [ (u'NIKKEI', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=main')]
-
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-
-        cj = mechanize.LWPCookieJar()
-        br.set_cookiejar(cj)
-
-        #br.set_debug_http(True)
-        #br.set_debug_redirects(True)
-        #br.set_debug_responses(True)
-
-        if self.username is not None and self.password is not None:
-            #print "----------------------------get login form--------------------------------------------"
-            # open login form
-            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
-            response = br.response()
-            #print "----------------------------get login form---------------------------------------------"
-            #print "----------------------------set login form---------------------------------------------"
-            # remove disabled input which brings error on mechanize
-            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
-            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
-            br.set_response(response)
-            br.select_form(name='LA0010Form01')
-            br['LA0010Form01:LA0010Email']   = self.username
-            br['LA0010Form01:LA0010Password'] = self.password
-            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
-            br.submit()
-            br.response()
-            #print "----------------------------send login form---------------------------------------------"
-            #print "----------------------------open news main page-----------------------------------------"
-            # open news site
-            br.open('http://www.nikkei.com/')
-            br.response()
-            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
-            #print response2.get_data()
-            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
-            # forced redirect in default
-            br.select_form(nr=0)
-            br.submit()
-            response3 = br.response()
-            # return some cookie which should be set by Javascript
-            #print response3.geturl()
-            raw = response3.get_data()
-            #print "---------------------------response to form --------------------------------------------"
-            # grab cookie from JS and set it
-            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
-            br.select_form(nr=0)
-
-            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
-            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
-
-            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
-            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
-            self.temp_files[-1].close()
-            cj.load(self.temp_files[-1].name)
-
-            br.submit()
-
-            #br.set_debug_http(False)
-            #br.set_debug_redirects(False)
-            #br.set_debug_responses(False)
-        return br
-
-
-
-
--- a/recipes/nikkei_sub_shakai.recipe
+++ b/recipes/nikkei_sub_shakai.recipe
@ -1,102 +0,0 @@
-__license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
-'''
-www.nikkei.com
-'''
-
-import re
-from calibre.web.feeds.recipes import BasicNewsRecipe
-import mechanize
-from calibre.ptempfile import PersistentTemporaryFile
-
-
-class NikkeiNet_sub_shakai(BasicNewsRecipe):
-    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Social)'
-    __author__      = 'Hiroshi Miura'
-    description     = 'News and current market affairs from Japan'
-    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
-    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
-    needs_subscription = True
-    oldest_article  = 2
-    max_articles_per_feed = 20
-    language        = 'ja'
-    remove_javascript = False
-    temp_files = []
-
-    remove_tags_before = {'class':"cmn-section cmn-indent"}
-    remove_tags = [
-                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
-                       {'class':"cmn-article_keyword cmn-clearfix"},
-                       {'class':"cmn-print_headline cmn-clearfix"},
-                         ]
-    remove_tags_after = {'class':"cmn-pr_list"}
-
-    feeds = [ 
-		 (u'\u793e\u4f1a', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai')
-        ]
-
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-
-        cj = mechanize.LWPCookieJar()
-        br.set_cookiejar(cj)
-
-        #br.set_debug_http(True)
-        #br.set_debug_redirects(True)
-        #br.set_debug_responses(True)
-
-        if self.username is not None and self.password is not None:
-            #print "----------------------------get login form--------------------------------------------"
-            # open login form
-            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
-            response = br.response()
-            #print "----------------------------get login form---------------------------------------------"
-            #print "----------------------------set login form---------------------------------------------"
-            # remove disabled input which brings error on mechanize
-            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
-            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
-            br.set_response(response)
-            br.select_form(name='LA0010Form01')
-            br['LA0010Form01:LA0010Email']   = self.username
-            br['LA0010Form01:LA0010Password'] = self.password
-            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
-            br.submit()
-            br.response()
-            #print "----------------------------send login form---------------------------------------------"
-            #print "----------------------------open news main page-----------------------------------------"
-            # open news site
-            br.open('http://www.nikkei.com/')
-            br.response()
-            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
-            #print response2.get_data()
-            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
-            # forced redirect in default
-            br.select_form(nr=0)
-            br.submit()
-            response3 = br.response()
-            # return some cookie which should be set by Javascript
-            #print response3.geturl()
-            raw = response3.get_data()
-            #print "---------------------------response to form --------------------------------------------"
-            # grab cookie from JS and set it
-            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
-            br.select_form(nr=0)
-
-            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
-            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
-
-            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
-            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
-            self.temp_files[-1].close()
-            cj.load(self.temp_files[-1].name)
-
-            br.submit()
-
-            #br.set_debug_http(False)
-            #br.set_debug_redirects(False)
-            #br.set_debug_responses(False)
-        return br
-
-
-
-
--- a/recipes/nikkei_sub_sports.recipe
+++ b/recipes/nikkei_sub_sports.recipe
@ -1,108 +0,0 @@
-
-__license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
-'''
-www.nikkei.com
-'''
-
-import re
-from calibre.web.feeds.recipes import BasicNewsRecipe
-import mechanize
-from calibre.ptempfile import PersistentTemporaryFile
-
-
-class NikkeiNet_sub_sports(BasicNewsRecipe):
-    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u30b9\u30dd\u30fc\u30c4)'
-    __author__      = 'Hiroshi Miura'
-    description     = 'News and current market affairs from Japan'
-    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
-    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
-    needs_subscription = True
-    oldest_article  = 2
-    max_articles_per_feed = 20
-    language        = 'ja'
-    remove_javascript = False
-    temp_files = []
-
-    remove_tags_before = {'class':"cmn-section cmn-indent"}
-    remove_tags = [
-                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
-                       {'class':"cmn-article_keyword cmn-clearfix"},
-                       {'class':"cmn-print_headline cmn-clearfix"},
-                         ]
-    remove_tags_after = {'class':"cmn-pr_list"}
-
-    feeds = [
-		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
-		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
-		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
-		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
-		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
-		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba')
-        ]
-
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-
-        cj = mechanize.LWPCookieJar()
-        br.set_cookiejar(cj)
-
-        #br.set_debug_http(True)
-        #br.set_debug_redirects(True)
-        #br.set_debug_responses(True)
-
-        if self.username is not None and self.password is not None:
-            #print "----------------------------get login form--------------------------------------------"
-            # open login form
-            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
-            response = br.response()
-            #print "----------------------------get login form---------------------------------------------"
-            #print "----------------------------set login form---------------------------------------------"
-            # remove disabled input which brings error on mechanize
-            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
-            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
-            br.set_response(response)
-            br.select_form(name='LA0010Form01')
-            br['LA0010Form01:LA0010Email']   = self.username
-            br['LA0010Form01:LA0010Password'] = self.password
-            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
-            br.submit()
-            br.response()
-            #print "----------------------------send login form---------------------------------------------"
-            #print "----------------------------open news main page-----------------------------------------"
-            # open news site
-            br.open('http://www.nikkei.com/')
-            br.response()
-            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
-            #print response2.get_data()
-            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
-            # forced redirect in default
-            br.select_form(nr=0)
-            br.submit()
-            response3 = br.response()
-            # return some cookie which should be set by Javascript
-            #print response3.geturl()
-            raw = response3.get_data()
-            #print "---------------------------response to form --------------------------------------------"
-            # grab cookie from JS and set it
-            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
-            br.select_form(nr=0)
-
-            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
-            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
-
-            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
-            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
-            self.temp_files[-1].close()
-            cj.load(self.temp_files[-1].name)
-
-            br.submit()
-
-            #br.set_debug_http(False)
-            #br.set_debug_redirects(False)
-            #br.set_debug_responses(False)
-        return br
-
-
-
-
--- a/recipes/uninohimitu.recipe
+++ b/recipes/uninohimitu.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
 '''
 http://ameblo.jp/sauta19/
 '''
@ -18,7 +18,7 @@ class UniNoHimituKichiBlog(BasicNewsRecipe):
    category       = 'cat, pet, japan'
    language       = 'ja'
    encoding      = 'utf-8'
-
+    keep_only_tags = [{'class':'entry_head'},{'class':'subContentsInner'}]
    feeds          = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/sauta19/rss20.xml')]

    def parse_feeds(self):
--- a/recipes/yomiuri.recipe
+++ b/recipes/yomiuri.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.yomiuri.co.jp
 '''
@ -16,16 +16,13 @@ class YOLNews(BasicNewsRecipe):
    publisher      = 'Yomiuri Online News'
    category       = 'news, japan'
    language       = 'ja'
-    encoding       = 'Shift_JIS'
+    encoding       = 'UTF-8'
    index          = 'http://www.yomiuri.co.jp/latestnews/'
    remove_javascript = True
    masthead_title = u'YOMIURI ONLINE'

-    keep_only_tags = [{'class':"article-def"}]
-    remove_tags = [{'class':"RelatedArticle"},
-                   {'class':"sbtns"}
-                    ]
-    remove_tags_after = {'class':"date-def"}
+
+    keep_only_tags = [{'class':"article text-resizeable"}]

    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
@ -42,22 +39,22 @@ class YOLNews(BasicNewsRecipe):

    def parse_index(self):
        feeds = []
+        newsarticles = []
        soup   = self.index_to_soup(self.index)
-        topstories = soup.find('ul',attrs={'class':'list-def'})
-        if topstories:
-           newsarticles = []
-           for itt in topstories.findAll('li'):
-                itema = itt.find('a',href=True)
-                if itema:
-                    itd1 = itema.findNextSibling(text = True)
-                    itd2 = itd1.findNextSibling(text = True)
-                    itd3 = itd2.findNextSibling(text = True)
-                    newsarticles.append({
-                                      'title'      :itema.string
-                                     ,'date'       :''.join([itd1, itd2, itd3])
-                                     ,'url'        :'http://www.yomiuri.co.jp' + itema['href']
-                                     ,'description':''
-                                    })
-           feeds.append(('latest', newsarticles))
+        listlatest = soup.find('ul', attrs={'class':'list-common list-common-latest'})
+        if listlatest:
+                for itt in listlatest.findAll('li'):
+                    itema = itt.find('a',href=True)
+                    if itema:
+                        item_headline = itema.find('span',attrs={'class':'headline'})
+                        item_date     = item_headline.find('span',attrs={'class':'update'})
+                        newsarticles.append({
+                               'title'      :item_headline.contents[0]
+                              ,'date'       :item_date
+                              ,'url'        :itema['href']
+                              ,'description':''
+                        })
+        feeds.append(('latest', newsarticles))
        return feeds

+
--- a/recipes/yomiuri_world.recipe
+++ b/recipes/yomiuri_world.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.yomiuri.co.jp
 '''
@ -16,16 +16,12 @@ class YOLNews(BasicNewsRecipe):
    publisher      = 'Yomiuri Online News'
    category       = 'news, japan'
    language       = 'ja'
-    encoding       = 'Shift_JIS'
+    encoding       = 'UTF-8'
    index          = 'http://www.yomiuri.co.jp/world/'
    remove_javascript = True
    masthead_title = u"YOMIURI ONLINE"

-    keep_only_tags = [{'class':"article-def"}]
-    remove_tags = [{'class':"RelatedArticle"},
-                   {'class':"sbtns"}
-                    ]
-    remove_tags_after = {'class':"date-def"}
+    keep_only_tags = [{'class':"article text-resizeable"}]

    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
@ -42,20 +38,36 @@ class YOLNews(BasicNewsRecipe):

    def parse_index(self):
        feeds = []
+        newsarticles = []
        soup   = self.index_to_soup(self.index)
-        topstories = soup.find('ul',attrs={'class':'list-def'})
-        if topstories:
-           newsarticles = []
-           for itt in topstories.findAll('li'):
-                itema = itt.find('a',href=True)
-                if itema:
-                    itd1 = itema.findNextSibling(text = True)
-                    newsarticles.append({
-                                      'title'      :itema.string
-                                     ,'date'       :''.join([itd1])
-                                     ,'url'        :'http://www.yomiuri.co.jp' + itema['href']
-                                     ,'description':''
-                                    })
-           feeds.append(('World', newsarticles))
+        mainspan = soup.find('div', attrs={'class':'pbNested span-main-inr'})
+        if mainspan:
+            topstories = mainspan.find('ul',attrs={'class':'list-top'})
+            if topstories:
+                for itt in topstories.findAll('li'):
+                    itema = itt.find('a',href=True)
+                    if itema:
+                        item_headline = itema.find('span',attrs={'class':'headline'})
+                        item_date     = item_headline.find('span',attrs={'class':'update'})
+                        newsarticles.append({
+                               'title'      :item_headline.contents[0]
+                              ,'date'       :item_date
+                              ,'url'        :itema['href']
+                              ,'description':''
+                        })
+            secondstories = mainspan.find('ul', attrs={'class':'list-common'})
+            if secondstories:
+                for itt in secondstories.findAll('li'):
+                    itema = itt.find('a',href=True)
+                    if itema:
+                        item_headline = itema.find('span',attrs={'class':'headline'})
+                        item_date     = item_headline.find('span',attrs={'class':'update'})
+                        newsarticles.append({
+                               'title'      :item_headline.contents[0]
+                              ,'date'       :item_date
+                              ,'url'        :itema['href']
+                              ,'description':''
+                        })
+        feeds.append(('World', newsarticles))
        return feeds