Various Japanese recipes by Hiroshi Miura

2025-08-11 09:13:57 -04:00 · 2010-11-23 09:04:13 -07:00 · 2010-11-23 09:04:13 -07:00 · 41ba44a793
commit 41ba44a793
parent eaeee277f0 9af8b9c322
28 changed files with 817 additions and 0 deletions
--- a/resources/images/news/cnetjapan.png
+++ b/resources/images/news/cnetjapan.png
--- a/resources/images/news/endgadget_ja.png
+++ b/resources/images/news/endgadget_ja.png
--- a/resources/images/news/jijinews.png
+++ b/resources/images/news/jijinews.png
--- a/resources/images/news/msnsankei.png
+++ b/resources/images/news/msnsankei.png
--- a/resources/images/news/nikkei_free.png
+++ b/resources/images/news/nikkei_free.png
--- a/resources/images/news/nikkei_sub_economy.png
+++ b/resources/images/news/nikkei_sub_economy.png
--- a/resources/images/news/nikkei_sub_industory.png
+++ b/resources/images/news/nikkei_sub_industory.png
--- a/resources/images/news/nikkei_sub_life.png
+++ b/resources/images/news/nikkei_sub_life.png
--- a/resources/images/news/nikkei_sub_main.png
+++ b/resources/images/news/nikkei_sub_main.png
--- a/resources/images/news/nikkei_sub_sports.png
+++ b/resources/images/news/nikkei_sub_sports.png
--- a/resources/images/news/reuters.png
+++ b/resources/images/news/reuters.png
--- a/resources/images/news/reuters_ja.png
+++ b/resources/images/news/reuters_ja.png
--- a/resources/recipes/avto-magazin.recipe
+++ b/resources/recipes/avto-magazin.recipe
@ -13,6 +13,7 @@ class Dnevnik(BasicNewsRecipe):
  labguage = 'sl'
  no_stylesheets = True
  use_embedded_content = False
  language = 'sl'
  conversion_options = {'linearize_tables' : True}
--- a/resources/recipes/cnetjapan.recipe
+++ b/resources/recipes/cnetjapan.recipe
@ -0,0 +1,32 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class CNetJapan(BasicNewsRecipe):
    title          = u'CNET Japan'
    oldest_article = 3
    max_articles_per_feed = 30
    __author__  = 'Hiroshi Miura'
    feeds          = [(u'cnet rss', u'http://feeds.japan.cnet.com/cnet/rss')]
    language       = 'ja'
    encoding       = 'Shift_JIS'
    remove_javascript = True
    preprocess_regexps = [
       (re.compile(ur'<!--\u25B2contents_left END\u25B2-->.*</body>', re.DOTALL|re.IGNORECASE|re.UNICODE),
        lambda match: '</body>'),
       (re.compile(r'<!--AD_ELU_HEADER-->.*</body>', re.DOTALL|re.IGNORECASE),
        lambda match: '</body>'),
       (re.compile(ur'<!-- \u25B2\u95A2\u9023\u30BF\u30B0\u25B2 -->.*<!-- \u25B2ZDNet\u25B2 -->', re.UNICODE),
        lambda match: '<!-- removed -->'),
        ]
    remove_tags_before = dict(name="h2")
    remove_tags = [
                   {'class':"social_bkm_share"},
                   {'class':"social_bkm_print"},
                   {'class':"block20 clearfix"},
                   dict(name="div",attrs={'id':'bookreview'}),
                    ]
    remove_tags_after = {'class':"block20"}
--- a/resources/recipes/endgadget_ja.recipe
+++ b/resources/recipes/endgadget_ja.recipe
@ -0,0 +1,22 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 japan.engadget.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class EndgadgetJapan(BasicNewsRecipe):
    title          = u'Endgadget\u65e5\u672c\u7248'
    language = 'ja'
    __author__ = 'Hiroshi Miura'
    cover_url      = 'http://skins18.wincustomize.com/1/49/149320/29/7578/preview-29-7578.jpg'
    masthead_url   = 'http://www.blogsmithmedia.com/japanese.engadget.com/media/eng-jp-logo-t.png'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    language = 'ja'
    encoding = 'utf-8'
    feeds          = [(u'engadget', u'http://japanese.engadget.com/rss.xml')]
--- a/resources/recipes/jijinews.recipe
+++ b/resources/recipes/jijinews.recipe
@ -0,0 +1,26 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.jiji.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class JijiDotCom(BasicNewsRecipe):
    title          = u'\u6642\u4e8b\u901a\u4fe1'
    __author__     = 'Hiroshi Miura'
    description    = 'World News from Jiji Press'
    publisher      = 'Jiji Press Ltd.'
    category       = 'news'
    encoding       = 'utf-8'
    oldest_article = 6
    max_articles_per_feed = 100
    language       = 'ja'
    cover_url       = 'http://www.jiji.com/img/top_header_logo2.gif'
    masthead_url    = 'http://jen.jiji.com/images/logo_jijipress.gif'
    feeds          = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
    remove_tags_after = dict(id="ad_google")
--- a/resources/recipes/mainichi.recipe
+++ b/resources/recipes/mainichi.recipe
@ -0,0 +1,26 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.mainichi.jp
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class MainichiDailyNews(BasicNewsRecipe):
    title          = u'\u6bce\u65e5\u65b0\u805e'
    __author__     = 'Hiroshi Miura'
    oldest_article = 2
    max_articles_per_feed = 20
    description    = 'Japanese traditional newspaper Mainichi Daily News'
    publisher      = 'Mainichi Daily News'
    category       = 'news, japan'
    language       = 'ja'
    feeds          = [(u'daily news', u'http://mainichi.jp/rss/etc/flash.rss')]
    remove_tags_before = {'class':"NewsTitle"}
    remove_tags = [{'class':"RelatedArticle"}]
    remove_tags_after = {'class':"Credit"}
--- a/resources/recipes/mainichi_it_news.recipe
+++ b/resources/recipes/mainichi_it_news.recipe
@ -0,0 +1,18 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class MainichiDailyITNews(BasicNewsRecipe):
    title          = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
    __author__     = 'Hiroshi Miura'
    oldest_article = 2
    max_articles_per_feed = 100
    description    = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics'
    publisher      = 'Mainichi Daily News'
    category       = 'news, Japan, IT, Electronics'
    language       = 'ja'
    feeds          = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')]
    remove_tags_before = {'class':"NewsTitle"}
    remove_tags = [{'class':"RelatedArticle"}]
    remove_tags_after = {'class':"Credit"}
--- a/resources/recipes/msnsankei.recipe
+++ b/resources/recipes/msnsankei.recipe
@ -0,0 +1,24 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 sankei.jp.msn.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class MSNSankeiNewsProduct(BasicNewsRecipe):
    title          = u'MSN\u7523\u7d4c\u30cb\u30e5\u30fc\u30b9(\u65b0\u5546\u54c1)'
    __author__      = 'Hiroshi Miura'
    description     = 'Products release from Japan'
    oldest_article = 7
    max_articles_per_feed = 100
    encoding       = 'Shift_JIS'
    language       = 'ja'
    feeds          = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')]
    remove_tags_before = dict(id="__r_article_title__")
    remove_tags_after  = dict(id="ajax_release_news")
    remove_tags = [{'class':"parent chromeCustom6G"}]
--- a/resources/recipes/nikkei_free.recipe
+++ b/resources/recipes/nikkei_free.recipe
@ -0,0 +1,60 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.nikkei.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class NikkeiNet(BasicNewsRecipe):
    title          = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free)'
    __author__     = 'Hiroshi Miura'
    description    = 'News and current market affairs from Japan'
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    oldest_article = 2
    max_articles_per_feed = 20
    language       = 'ja'
    feeds          =  [ (u'\u65e5\u7d4c\u4f01\u696d', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
 		 (u'\u65e5\u7d4c\u88fd\u54c1',	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
 		 (u'internet',		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
 		 (u'\u653f\u6cbb', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
 		 (u'\u8ca1\u52d9', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
 		 (u'\u7d4c\u6e08', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
 		 (u'\u56fd\u969b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
 		 (u'\u79d1\u5b66', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
 		 (u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
 		 (u'\u304f\u3089\u3057', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
 		 (u'\u30b9\u30dd\u30fc\u30c4', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
 		 (u'\u793e\u4f1a', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
 		 (u'\u30a8\u30b3', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
 		 (u'\u5065\u5eb7', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
 		 (u'\u96c7\u7528', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
 		 (u'\u6559\u80b2', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
 		 (u'\u304a\u304f\u3084\u307f', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
 		 (u'\u4eba\u4e8b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
 		 (u'\u7279\u96c6', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
 		 (u'\u5730\u57df\u30cb\u30e5\u30fc\u30b9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=local'),
 		 (u'\u7d71\u8a08\u30fb\u767d\u66f8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=report'),
 		 (u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking'),
 		 (u'\u4f1a\u898b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=interview'),
 		 (u'\u793e\u8aac\u30fb\u6625\u79cb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shasetsu'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba'),
 		 (u'\u8abf\u67fb\u30fb\u30a2\u30f3\u30b1\u30fc\u30c8', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=research')
 		]
    remove_tags_before = dict(id="CONTENTS")
    remove_tags = [
                   dict(name="form"),
                   {'class':"cmn-hide"},
                  ]
    remove_tags_after = {'class':"cmn-pr_list"}
--- a/resources/recipes/nikkei_sub_economy.recipe
+++ b/resources/recipes/nikkei_sub_economy.recipe
@ -0,0 +1,109 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.nikkei.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import mechanize
 from calibre.ptempfile import PersistentTemporaryFile
 class NikkeiNet_sub_economy(BasicNewsRecipe):
    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7d4c\u6e08)'
    __author__      = 'Hiroshi Miura'
    description     = 'News and current market affairs from Japan'
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    needs_subscription = True
    oldest_article  = 2
    max_articles_per_feed = 20
    language        = 'ja'
    remove_javascript = False
    temp_files = []
    remove_tags_before = {'class':"cmn-section cmn-indent"}
    remove_tags = [
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}
    feeds = [  (u'\u653f\u6cbb', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
 		 (u'\u8ca1\u52d9', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
 		 (u'\u7d4c\u6e08', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
 		 (u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
 		 (u'\u96c7\u7528', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
 		 (u'\u6559\u80b2', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
 		 (u'\u304a\u304f\u3084\u307f', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
 		 (u'\u4eba\u4e8b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
        ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        cj = mechanize.LWPCookieJar()
        br.set_cookiejar(cj)
        #br.set_debug_http(True)
        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        if self.username is not None and self.password is not None:
            #print "----------------------------get login form--------------------------------------------"
            # open login form
            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
            response = br.response()
            #print "----------------------------get login form---------------------------------------------"
            #print "----------------------------set login form---------------------------------------------"
            # remove disabled input which brings error on mechanize
            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
            br.set_response(response)
            br.select_form(name='LA0010Form01')
            br['LA0010Form01:LA0010Email']   = self.username
            br['LA0010Form01:LA0010Password'] = self.password
            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
            br.submit()
            br.response()
            #print "----------------------------send login form---------------------------------------------"
            #print "----------------------------open news main page-----------------------------------------"
            # open news site
            br.open('http://www.nikkei.com/')
            br.response()
            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
            #print response2.get_data()
            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
            # forced redirect in default
            br.select_form(nr=0)
            br.submit()
            response3 = br.response()
            # return some cookie which should be set by Javascript
            #print response3.geturl()
            raw = response3.get_data()
            #print "---------------------------response to form --------------------------------------------"
            # grab cookie from JS and set it
            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
            br.select_form(nr=0)
            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].close()
            cj.load(self.temp_files[-1].name)
            br.submit()
            #br.set_debug_http(False)
            #br.set_debug_redirects(False)
            #br.set_debug_responses(False)
        return br
--- a/resources/recipes/nikkei_sub_industry.recipe
+++ b/resources/recipes/nikkei_sub_industry.recipe
@ -0,0 +1,108 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.nikkei.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import mechanize
 from calibre.ptempfile import PersistentTemporaryFile
 class NikkeiNet_sub_industory(BasicNewsRecipe):
    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7523\u696d)'
    __author__      = 'Hiroshi Miura'
    description     = 'News and current market affairs from Japan'
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    needs_subscription = True
    oldest_article  = 2
    max_articles_per_feed = 20
    language        = 'ja'
    remove_javascript = False
    temp_files = []
    remove_tags_before = {'class':"cmn-section cmn-indent"}
    remove_tags = [
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}
    feeds = [ (u'\u65e5\u7d4c\u4f01\u696d', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
 		 (u'\u65e5\u7d4c\u88fd\u54c1',	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
 		 (u'internet',		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
 		 (u'\u56fd\u969b', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
 		 (u'\u79d1\u5b66', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
        ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        cj = mechanize.LWPCookieJar()
        br.set_cookiejar(cj)
        #br.set_debug_http(True)
        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        if self.username is not None and self.password is not None:
            #print "----------------------------get login form--------------------------------------------"
            # open login form
            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
            response = br.response()
            #print "----------------------------get login form---------------------------------------------"
            #print "----------------------------set login form---------------------------------------------"
            # remove disabled input which brings error on mechanize
            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
            br.set_response(response)
            br.select_form(name='LA0010Form01')
            br['LA0010Form01:LA0010Email']   = self.username
            br['LA0010Form01:LA0010Password'] = self.password
            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
            br.submit()
            br.response()
            #print "----------------------------send login form---------------------------------------------"
            #print "----------------------------open news main page-----------------------------------------"
            # open news site
            br.open('http://www.nikkei.com/')
            br.response()
            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
            #print response2.get_data()
            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
            # forced redirect in default
            br.select_form(nr=0)
            br.submit()
            response3 = br.response()
            # return some cookie which should be set by Javascript
            #print response3.geturl()
            raw = response3.get_data()
            #print "---------------------------response to form --------------------------------------------"
            # grab cookie from JS and set it
            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
            br.select_form(nr=0)
            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].close()
            cj.load(self.temp_files[-1].name)
            br.submit()
            #br.set_debug_http(False)
            #br.set_debug_redirects(False)
            #br.set_debug_responses(False)
        return br
--- a/resources/recipes/nikkei_sub_life.recipe
+++ b/resources/recipes/nikkei_sub_life.recipe
@ -0,0 +1,109 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.nikkei.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import mechanize
 from calibre.ptempfile import PersistentTemporaryFile
 class NikkeiNet_sub_life(BasicNewsRecipe):
    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
    __author__      = 'Hiroshi Miura'
    description     = 'News and current market affairs from Japan'
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    needs_subscription = True
    oldest_article  = 2
    max_articles_per_feed = 20
    language        = 'ja'
    remove_javascript = False
    temp_files = []
    remove_tags_before = {'class':"cmn-section cmn-indent"}
    remove_tags = [
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}
    feeds = [  (u'\u304f\u3089\u3057', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
 		 (u'\u30b9\u30dd\u30fc\u30c4', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
 		 (u'\u793e\u4f1a', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
 		 (u'\u30a8\u30b3', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
 		 (u'\u5065\u5eb7', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
 		 (u'\u7279\u96c6', 		u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
 		 (u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking')
        ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        cj = mechanize.LWPCookieJar()
        br.set_cookiejar(cj)
        #br.set_debug_http(True)
        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        if self.username is not None and self.password is not None:
            #print "----------------------------get login form--------------------------------------------"
            # open login form
            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
            response = br.response()
            #print "----------------------------get login form---------------------------------------------"
            #print "----------------------------set login form---------------------------------------------"
            # remove disabled input which brings error on mechanize
            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
            br.set_response(response)
            br.select_form(name='LA0010Form01')
            br['LA0010Form01:LA0010Email']   = self.username
            br['LA0010Form01:LA0010Password'] = self.password
            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
            br.submit()
            br.response()
            #print "----------------------------send login form---------------------------------------------"
            #print "----------------------------open news main page-----------------------------------------"
            # open news site
            br.open('http://www.nikkei.com/')
            br.response()
            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
            #print response2.get_data()
            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
            # forced redirect in default
            br.select_form(nr=0)
            br.submit()
            response3 = br.response()
            # return some cookie which should be set by Javascript
            #print response3.geturl()
            raw = response3.get_data()
            #print "---------------------------response to form --------------------------------------------"
            # grab cookie from JS and set it
            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
            br.select_form(nr=0)
            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].close()
            cj.load(self.temp_files[-1].name)
            br.submit()
            #br.set_debug_http(False)
            #br.set_debug_redirects(False)
            #br.set_debug_responses(False)
        return br
--- a/resources/recipes/nikkei_sub_main.recipe
+++ b/resources/recipes/nikkei_sub_main.recipe
@ -0,0 +1,102 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.nikkei.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import mechanize
 from calibre.ptempfile import PersistentTemporaryFile
 class NikkeiNet_sub_main(BasicNewsRecipe):
    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7dcf\u5408)'
    __author__      = 'Hiroshi Miura'
    description     = 'News and current market affairs from Japan'
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    needs_subscription = True
    oldest_article  = 2
    max_articles_per_feed = 20
    language        = 'ja'
    remove_javascript = False
    temp_files = []
    remove_tags_before = {'class':"cmn-section cmn-indent"}
    remove_tags = [
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}
    feeds = [ (u'NIKKEI', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=main')]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        cj = mechanize.LWPCookieJar()
        br.set_cookiejar(cj)
        #br.set_debug_http(True)
        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        if self.username is not None and self.password is not None:
            #print "----------------------------get login form--------------------------------------------"
            # open login form
            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
            response = br.response()
            #print "----------------------------get login form---------------------------------------------"
            #print "----------------------------set login form---------------------------------------------"
            # remove disabled input which brings error on mechanize
            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
            br.set_response(response)
            br.select_form(name='LA0010Form01')
            br['LA0010Form01:LA0010Email']   = self.username
            br['LA0010Form01:LA0010Password'] = self.password
            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
            br.submit()
            br.response()
            #print "----------------------------send login form---------------------------------------------"
            #print "----------------------------open news main page-----------------------------------------"
            # open news site
            br.open('http://www.nikkei.com/')
            br.response()
            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
            #print response2.get_data()
            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
            # forced redirect in default
            br.select_form(nr=0)
            br.submit()
            response3 = br.response()
            # return some cookie which should be set by Javascript
            #print response3.geturl()
            raw = response3.get_data()
            #print "---------------------------response to form --------------------------------------------"
            # grab cookie from JS and set it
            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
            br.select_form(nr=0)
            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].close()
            cj.load(self.temp_files[-1].name)
            br.submit()
            #br.set_debug_http(False)
            #br.set_debug_redirects(False)
            #br.set_debug_responses(False)
        return br
--- a/resources/recipes/nikkei_sub_sports.recipe
+++ b/resources/recipes/nikkei_sub_sports.recipe
@ -0,0 +1,109 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.nikkei.com
 '''
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 import mechanize
 from calibre.ptempfile import PersistentTemporaryFile
 class NikkeiNet_sub_sports(BasicNewsRecipe):
    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u30b9\u30dd\u30fc\u30c4)'
    __author__      = 'Hiroshi Miura'
    description     = 'News and current market affairs from Japan'
    cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    needs_subscription = True
    oldest_article  = 2
    max_articles_per_feed = 20
    language        = 'ja'
    remove_javascript = False
    temp_files = []
    remove_tags_before = {'class':"cmn-section cmn-indent"}
    remove_tags = [
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}
    feeds = [
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
 		 (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', 	u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba')
        ]
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        cj = mechanize.LWPCookieJar()
        br.set_cookiejar(cj)
        #br.set_debug_http(True)
        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)
        if self.username is not None and self.password is not None:
            #print "----------------------------get login form--------------------------------------------"
            # open login form
            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
            response = br.response()
            #print "----------------------------get login form---------------------------------------------"
            #print "----------------------------set login form---------------------------------------------"
            # remove disabled input which brings error on mechanize
            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
            br.set_response(response)
            br.select_form(name='LA0010Form01')
            br['LA0010Form01:LA0010Email']   = self.username
            br['LA0010Form01:LA0010Password'] = self.password
            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
            br.submit()
            br.response()
            #print "----------------------------send login form---------------------------------------------"
            #print "----------------------------open news main page-----------------------------------------"
            # open news site
            br.open('http://www.nikkei.com/')
            br.response()
            #print "----------------------------www.nikkei.com BODY   --------------------------------------"
            #print response2.get_data()
            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
            # forced redirect in default
            br.select_form(nr=0)
            br.submit()
            response3 = br.response()
            # return some cookie which should be set by Javascript
            #print response3.geturl()
            raw = response3.get_data()
            #print "---------------------------response to form --------------------------------------------"
            # grab cookie from JS and set it
            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
            br.select_form(nr=0)
            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
            self.temp_files[-1].write("#LWP-Cookies-2.0\n")
            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].close()
            cj.load(self.temp_files[-1].name)
            br.submit()
            #br.set_debug_http(False)
            #br.set_debug_redirects(False)
            #br.set_debug_responses(False)
        return br
--- a/resources/recipes/now_toronto.recipe
+++ b/resources/recipes/now_toronto.recipe
@ -13,6 +13,7 @@ class NowToronto(BasicNewsRecipe):
    title = u'Now Toronto'
    description = u'Now Toronto'
    __author__ = 'Starson17'
    language = 'en_CA'
    conversion_options = {
        'no_default_epub_cover' : True
    }
--- a/resources/recipes/reuters_ja.recipe
+++ b/resources/recipes/reuters_ja.recipe
@ -0,0 +1,37 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class ReutersJa(BasicNewsRecipe):
    title = 'Reuters(Japan)'
    description = 'Global news in Japanese'
    __author__ = 'Hiroshi Miura'
    use_embedded_content   = False
    language = 'ja'
    max_articles_per_feed = 10
    remove_javascript = True
    feeds = [ ('Top Stories', 'http://feeds.reuters.com/reuters/JPTopNews?format=xml'),
                  ('World News', 'http://feeds.reuters.com/reuters/JPWorldNews?format=xml'),
                  ('Business News', 'http://feeds.reuters.com/reuters/JPBusinessNews?format=xml'),
                  ('Technology News', 'http://feeds.reuters.com/reuters/JPTechnologyNews?format=xml'),
                  ('Oddly Enough News', 'http://feeds.reuters.com/reuters/JPOddlyEnoughNews?format=xml')
         ]
    remove_tags_before = {'class':"article primaryContent"}
    remove_tags = [ dict(id="banner"),
                    dict(id="autilities"),
                    dict(id="textSizer"),
                    dict(id="shareFooter"),
                    dict(id="relatedNews"),
                    dict(id="editorsChoice"),
                    dict(id="ecArticles"),
                    {'class':"secondaryContent"},
                    {'class':"module"},
                     ]
    remove_tags_after = {'class':"assetBuddy"}
    def print_version(self, url):
        m = re.search('(.*idJPJAPAN-[0-9]+)', url)
        return m.group(0)+'?sp=true'
--- a/resources/recipes/the_h.recipe
+++ b/resources/recipes/the_h.recipe
@ -0,0 +1,33 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.h-online.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class TheHeiseOnline(BasicNewsRecipe):
    title          = u'The H'
    __author__     = 'Hiroshi Miura'
    oldest_article = 3
    description    = 'In association with Heise Online'
    publisher      = 'Heise Media UK Ltd.'
    category       = 'news, technology, security'
    max_articles_per_feed = 100
    language       = 'en'
    encoding       = 'utf-8'
    conversion_options = {
                      'comment'  : description
                     ,'tags'     : category
                     ,'publisher': publisher
                     ,'language' : language
                     }
    feeds          = [
                      (u'The H News Feed', u'http://www.h-online.com/news/atom.xml')
                     ]
    def print_version(self, url):
        return url + '?view=print'