mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
143 lines
7.6 KiB
Plaintext
143 lines
7.6 KiB
Plaintext
import re
|
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
import mechanize
|
|
from calibre.ptempfile import PersistentTemporaryFile
|
|
|
|
|
|
class NikkeiNet_subscription(BasicNewsRecipe):
|
|
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(MAX)'
|
|
__author__ = 'Hiroshi Miura'
|
|
description = 'News and current market affairs from Japan, gather MAX articles'
|
|
needs_subscription = True
|
|
oldest_article = 2
|
|
max_articles_per_feed = 10
|
|
language = 'ja'
|
|
remove_javascript = False
|
|
temp_files = []
|
|
|
|
remove_tags_before = {'class': "cmn-section cmn-indent"}
|
|
remove_tags = [
|
|
{'class': "JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
|
{'class': "cmn-article_keyword cmn-clearfix"},
|
|
{'class': "cmn-print_headline cmn-clearfix"},
|
|
]
|
|
remove_tags_after = {'class': "cmn-pr_list"}
|
|
|
|
def get_browser(self):
|
|
br = BasicNewsRecipe.get_browser(self)
|
|
|
|
cj = mechanize.LWPCookieJar()
|
|
br.set_cookiejar(cj)
|
|
|
|
# br.set_debug_http(True)
|
|
# br.set_debug_redirects(True)
|
|
# br.set_debug_responses(True)
|
|
|
|
if self.username is not None and self.password is not None:
|
|
# print "----------------------------get login form--------------------------------------------"
|
|
# open login form
|
|
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
|
response = br.response()
|
|
# print "----------------------------get login form---------------------------------------------"
|
|
# print "----------------------------set login form---------------------------------------------"
|
|
# remove disabled input which brings error on mechanize
|
|
response.set_data(response.get_data().replace(
|
|
"<input id=\"j_id48\"", "<!-- "))
|
|
response.set_data(response.get_data().replace(
|
|
"gm_home_on.gif\" />", " -->"))
|
|
br.set_response(response)
|
|
br.select_form(name='LA0010Form01')
|
|
br['LA0010Form01:LA0010Email'] = self.username
|
|
br['LA0010Form01:LA0010Password'] = self.password
|
|
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn', type="checkbox").get(
|
|
nr=0).selected = True
|
|
br.submit()
|
|
br.response()
|
|
# print "----------------------------send login form---------------------------------------------"
|
|
# print "----------------------------open news main page-----------------------------------------"
|
|
# open news site
|
|
br.open('http://www.nikkei.com/')
|
|
br.response()
|
|
# print "----------------------------www.nikkei.com BODY --------------------------------------"
|
|
# print response2.get_data()
|
|
# print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
|
# forced redirect in default
|
|
br.select_form(nr=0)
|
|
br.submit()
|
|
response3 = br.response()
|
|
# return some cookie which should be set by Javascript
|
|
# print response3.geturl()
|
|
raw = response3.get_data()
|
|
# print "---------------------------response to form --------------------------------------------"
|
|
# grab cookie from JS and set it
|
|
redirectflag = re.search(
|
|
r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
|
br.select_form(nr=0)
|
|
|
|
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
|
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
|
|
|
self.temp_files[-1].write(
|
|
"Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
|
self.temp_files[-1].write("Set-Cookie3: redirectFlag=" + redirectflag +
|
|
"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
|
self.temp_files[-1].close()
|
|
cj.load(self.temp_files[-1].name)
|
|
|
|
br.submit()
|
|
|
|
# br.set_debug_http(False)
|
|
# br.set_debug_redirects(False)
|
|
# br.set_debug_responses(False)
|
|
return br
|
|
|
|
feeds = [(u'\u65e5\u7d4c\u4f01\u696d', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
|
|
(u'\u65e5\u7d4c\u88fd\u54c1',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
|
|
(u'internet', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
|
|
(u'\u653f\u6cbb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
|
|
(u'\u8ca1\u52d9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
|
|
(u'\u7d4c\u6e08', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
|
|
(u'\u56fd\u969b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
|
|
(u'\u79d1\u5b66', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
|
|
(u'\u30de\u30fc\u30b1\u30c3\u30c8',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
|
|
(u'\u304f\u3089\u3057',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
|
(u'\u30b9\u30dd\u30fc\u30c4',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
|
|
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
|
|
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
|
|
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
|
(u'\u96c7\u7528', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
|
|
(u'\u6559\u80b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
|
|
(u'\u304a\u304f\u3084\u307f',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
|
|
(u'\u4eba\u4e8b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
|
|
(u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
|
|
(u'\u5730\u57df\u30cb\u30e5\u30fc\u30b9',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=local'),
|
|
(u'\u7d71\u8a08\u30fb\u767d\u66f8',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=report'),
|
|
(u'\u30e9\u30f3\u30ad\u30f3\u30b0',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking'),
|
|
(u'\u4f1a\u898b',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=interview'),
|
|
(u'\u793e\u8aac\u30fb\u6625\u79cb',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shasetsu'),
|
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
|
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
|
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
|
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
|
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
|
|
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba'),
|
|
(u'\u8abf\u67fb\u30fb\u30a2\u30f3\u30b1\u30fc\u30c8',
|
|
u'http://www.zou3.net/php/rss/nikkei2rss.php?head=research')
|
|
]
|