mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
104 lines
4.6 KiB
Plaintext
104 lines
4.6 KiB
Plaintext
__license__ = 'GPL v3'
|
|
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
|
'''
|
|
www.nikkei.com
|
|
'''
|
|
|
|
import re
|
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
import mechanize
|
|
from calibre.ptempfile import PersistentTemporaryFile
|
|
|
|
|
|
class NikkeiNet_sub_main(BasicNewsRecipe):
|
|
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7dcf\u5408)'
|
|
__author__ = 'Hiroshi Miura'
|
|
description = 'News and current market affairs from Japan'
|
|
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
|
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
|
needs_subscription = True
|
|
oldest_article = 2
|
|
max_articles_per_feed = 20
|
|
language = 'ja'
|
|
remove_javascript = False
|
|
temp_files = []
|
|
|
|
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
|
remove_tags = [
|
|
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
|
{'class':"cmn-article_keyword cmn-clearfix"},
|
|
{'class':"cmn-print_headline cmn-clearfix"},
|
|
{'class':"cmn-article_list"},
|
|
{'class':"cmn-dashedline"},
|
|
{'class':"cmn-hide"},
|
|
]
|
|
remove_tags_after = {'class':"cmn-pr_list"}
|
|
|
|
feeds = [ (u'NIKKEI', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=main')]
|
|
|
|
def get_browser(self):
|
|
br = BasicNewsRecipe.get_browser(self)
|
|
|
|
cj = mechanize.LWPCookieJar()
|
|
br.set_cookiejar(cj)
|
|
|
|
#br.set_debug_http(True)
|
|
#br.set_debug_redirects(True)
|
|
#br.set_debug_responses(True)
|
|
|
|
if self.username is not None and self.password is not None:
|
|
#print "----------------------------get login form--------------------------------------------"
|
|
# open login form
|
|
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
|
response = br.response()
|
|
#print "----------------------------get login form---------------------------------------------"
|
|
#print "----------------------------set login form---------------------------------------------"
|
|
# remove disabled input which brings error on mechanize
|
|
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
|
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
|
br.set_response(response)
|
|
br.select_form(name='LA0010Form01')
|
|
br['LA0010Form01:LA0010Email'] = self.username
|
|
br['LA0010Form01:LA0010Password'] = self.password
|
|
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
|
br.submit()
|
|
br.response()
|
|
#print "----------------------------send login form---------------------------------------------"
|
|
#print "----------------------------open news main page-----------------------------------------"
|
|
# open news site
|
|
br.open('http://www.nikkei.com/')
|
|
br.response()
|
|
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
|
#print response2.get_data()
|
|
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
|
# forced redirect in default
|
|
br.select_form(nr=0)
|
|
br.submit()
|
|
response3 = br.response()
|
|
# return some cookie which should be set by Javascript
|
|
#print response3.geturl()
|
|
raw = response3.get_data()
|
|
#print "---------------------------response to form --------------------------------------------"
|
|
# grab cookie from JS and set it
|
|
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
|
br.select_form(nr=0)
|
|
|
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
|
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
|
|
|
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
|
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
|
self.temp_files[-1].close()
|
|
cj.load(self.temp_files[-1].name)
|
|
|
|
br.submit()
|
|
|
|
#br.set_debug_http(False)
|
|
#br.set_debug_redirects(False)
|
|
#br.set_debug_responses(False)
|
|
return br
|
|
|
|
|
|
|
|
|