import re from calibre.web.feeds.recipes import BasicNewsRecipe import mechanize from calibre.ptempfile import PersistentTemporaryFile class NikkeiNet_subscription(BasicNewsRecipe): title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(MAX)' __author__ = 'Hiroshi Miura' description = 'News and current market affairs from Japan, gather MAX articles' needs_subscription = True oldest_article = 2 max_articles_per_feed = 10 language = 'ja' remove_javascript = False temp_files = [] remove_tags_before = {'class': "cmn-section cmn-indent"} remove_tags = [ {'class': "JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"}, {'class': "cmn-article_keyword cmn-clearfix"}, {'class': "cmn-print_headline cmn-clearfix"}, ] remove_tags_after = {'class': "cmn-pr_list"} def get_browser(self): br = BasicNewsRecipe.get_browser(self) cj = mechanize.LWPCookieJar() br.set_cookiejar(cj) # br.set_debug_http(True) # br.set_debug_redirects(True) # br.set_debug_responses(True) if self.username is not None and self.password is not None: # print "----------------------------get login form--------------------------------------------" # open login form br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam') response = br.response() # print "----------------------------get login form---------------------------------------------" # print "----------------------------set login form---------------------------------------------" # remove disabled input which brings error on mechanize response.set_data(response.get_data().replace( "", " -->")) br.set_response(response) br.select_form(name='LA0010Form01') br['LA0010Form01:LA0010Email'] = self.username br['LA0010Form01:LA0010Password'] = self.password br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn', type="checkbox").get( nr=0).selected = True br.submit() br.response() # print "----------------------------send login form---------------------------------------------" # print "----------------------------open news main page-----------------------------------------" # open news site br.open('http://www.nikkei.com/') br.response() # print "----------------------------www.nikkei.com BODY --------------------------------------" # print response2.get_data() # print "-------------------------^^-got auto redirect form----^^--------------------------------" # forced redirect in default br.select_form(nr=0) br.submit() response3 = br.response() # return some cookie which should be set by Javascript # print response3.geturl() raw = response3.get_data() # print "---------------------------response to form --------------------------------------------" # grab cookie from JS and set it redirectflag = re.search( r"var checkValue = '(\d+)';", raw, re.M).group(1) br.select_form(nr=0) self.temp_files.append(PersistentTemporaryFile('_fa.html')) self.temp_files[-1].write("#LWP-Cookies-2.0\n") self.temp_files[-1].write( "Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n") self.temp_files[-1].write("Set-Cookie3: redirectFlag=" + redirectflag + "; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n") self.temp_files[-1].close() cj.load(self.temp_files[-1].name) br.submit() # br.set_debug_http(False) # br.set_debug_redirects(False) # br.set_debug_responses(False) return br feeds = [(u'\u65e5\u7d4c\u4f01\u696d', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'), (u'\u65e5\u7d4c\u88fd\u54c1', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'), (u'internet', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'), (u'\u653f\u6cbb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'), (u'\u8ca1\u52d9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'), (u'\u7d4c\u6e08', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'), (u'\u56fd\u969b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'), (u'\u79d1\u5b66', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'), (u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'), (u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'), (u'\u30b9\u30dd\u30fc\u30c4', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'), (u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'), (u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'), (u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'), (u'\u96c7\u7528', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'), (u'\u6559\u80b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'), (u'\u304a\u304f\u3084\u307f', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'), (u'\u4eba\u4e8b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'), (u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'), (u'\u5730\u57df\u30cb\u30e5\u30fc\u30b9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=local'), (u'\u7d71\u8a08\u30fb\u767d\u66f8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=report'), (u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking'), (u'\u4f1a\u898b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=interview'), (u'\u793e\u8aac\u30fb\u6625\u79cb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shasetsu'), (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'), (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'), (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'), (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'), (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'), (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba'), (u'\u8abf\u67fb\u30fb\u30a2\u30f3\u30b1\u30fc\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=research') ]