mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update various Japanese news sources
Merge branch 'update_japanese_recipes_201405' of https://github.com/miurahr/calibre
This commit is contained in:
commit
7be851a6a3
@ -1,37 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
http://ameblo.jp/
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class SakuraBlog(BasicNewsRecipe):
|
||||
title = u'chou chou blog'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
oldest_article = 4
|
||||
publication_type = 'blog'
|
||||
max_articles_per_feed = 20
|
||||
description = 'Japanese popular dog blog'
|
||||
publisher = ''
|
||||
category = 'dog, pet, japan'
|
||||
language = 'ja'
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = True
|
||||
|
||||
feeds = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/chouchou1218/rss20.xml')]
|
||||
|
||||
def parse_feeds(self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
for curfeed in feeds:
|
||||
delList = []
|
||||
for a,curarticle in enumerate(curfeed.articles):
|
||||
if re.search(r'rssad.jp', curarticle.url):
|
||||
delList.append(curarticle)
|
||||
if len(delList)>0:
|
||||
for d in delList:
|
||||
index = curfeed.articles.index(d)
|
||||
curfeed.articles[index:index+1] = []
|
||||
return feeds
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
japan.engadget.com
|
||||
'''
|
||||
@ -20,19 +20,20 @@ class EndgadgetJapan(BasicNewsRecipe):
|
||||
index = 'http://japanese.engadget.com/'
|
||||
remove_javascript = True
|
||||
|
||||
remove_tags_before = dict(name="h1", attrs={'class':"post_title"})
|
||||
remove_tags_after = dict(name='div', attrs={'class':'post_body'})
|
||||
remove_tags_before = dict(name="header", attrs={'class':"header"})
|
||||
remove_tags_after = dict(name='div', attrs={'class':'post-meta'})
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
newsarticles = []
|
||||
soup = self.index_to_soup(self.index)
|
||||
for topstories in soup.findAll('div',attrs={'class':'post_content'}):
|
||||
itt = topstories.find('h4')
|
||||
for topstories in soup.findAll('header',attrs={'class':'post-header'}):
|
||||
itt = topstories.find('h2')
|
||||
itema = itt.find('a',href=True)
|
||||
itemtime = topstories.find('span',attrs={'class':'time'})
|
||||
newsarticles.append({
|
||||
'title' :itema.string
|
||||
,'date' :''
|
||||
,'date' :itemtime.string
|
||||
,'url' :itema['href']
|
||||
,'description':''
|
||||
})
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
www.kahoku.co.jp
|
||||
'''
|
||||
@ -21,11 +21,5 @@ class KahokuShinpoNews(BasicNewsRecipe):
|
||||
|
||||
feeds = [(u'news', u'http://www.kahoku.co.jp/rss/index_thk.xml')]
|
||||
|
||||
keep_only_tags = [ dict(id="page_title"),
|
||||
dict(id="news_detail"),
|
||||
dict(id="bt_title"),
|
||||
{'class':"photoLeft"},
|
||||
dict(id="bt_body")
|
||||
]
|
||||
remove_tags = [ {'class':"button"}]
|
||||
keep_only_tags = [ {'class':"category"},{'class':"ttl"},{'class':'photoimg'},{'class':"txt"},{'class':"data"}]
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
sankei.jp.msn.com
|
||||
'''
|
||||
@ -20,5 +20,4 @@ class MSNSankeiNewsProduct(BasicNewsRecipe):
|
||||
|
||||
feeds = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')]
|
||||
|
||||
remove_tags_before = dict(id="NewsTitle")
|
||||
remove_tags_after = dict(id="RelatedTitle")
|
||||
keep_only_tags = [dict(id=['MainContent'])]
|
||||
|
@ -1,110 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
www.nikkei.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import mechanize
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
class NikkeiNet_sub_economy(BasicNewsRecipe):
|
||||
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7d4c\u6e08)'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
description = 'News and current market affairs from Japan'
|
||||
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
needs_subscription = True
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
language = 'ja'
|
||||
remove_javascript = False
|
||||
temp_files = []
|
||||
|
||||
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||
remove_tags = [
|
||||
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||
{'class':"cmn-print_headline cmn-clearfix"},
|
||||
{'class':"cmn-article_list"},
|
||||
dict(id="ABOUT-NIKKEI"),
|
||||
{'class':"cmn-sub_market"},
|
||||
]
|
||||
remove_tags_after = {'class':"cmn-pr_list"}
|
||||
|
||||
feeds = [ (u'\u653f\u6cbb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
|
||||
(u'\u8ca1\u52d9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
|
||||
(u'\u7d4c\u6e08', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
|
||||
(u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
|
||||
(u'\u96c7\u7528', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
|
||||
(u'\u6559\u80b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
|
||||
(u'\u304a\u304f\u3084\u307f', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
|
||||
(u'\u4eba\u4e8b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
|
||||
cj = mechanize.LWPCookieJar()
|
||||
br.set_cookiejar(cj)
|
||||
|
||||
#br.set_debug_http(True)
|
||||
#br.set_debug_redirects(True)
|
||||
#br.set_debug_responses(True)
|
||||
|
||||
if self.username is not None and self.password is not None:
|
||||
#print "----------------------------get login form--------------------------------------------"
|
||||
# open login form
|
||||
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||
response = br.response()
|
||||
#print "----------------------------get login form---------------------------------------------"
|
||||
#print "----------------------------set login form---------------------------------------------"
|
||||
# remove disabled input which brings error on mechanize
|
||||
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||
br.set_response(response)
|
||||
br.select_form(name='LA0010Form01')
|
||||
br['LA0010Form01:LA0010Email'] = self.username
|
||||
br['LA0010Form01:LA0010Password'] = self.password
|
||||
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||
br.submit()
|
||||
br.response()
|
||||
#print "----------------------------send login form---------------------------------------------"
|
||||
#print "----------------------------open news main page-----------------------------------------"
|
||||
# open news site
|
||||
br.open('http://www.nikkei.com/')
|
||||
br.response()
|
||||
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||
#print response2.get_data()
|
||||
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||
# forced redirect in default
|
||||
br.select_form(nr=0)
|
||||
br.submit()
|
||||
response3 = br.response()
|
||||
# return some cookie which should be set by Javascript
|
||||
#print response3.geturl()
|
||||
raw = response3.get_data()
|
||||
#print "---------------------------response to form --------------------------------------------"
|
||||
# grab cookie from JS and set it
|
||||
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||
br.select_form(nr=0)
|
||||
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||
|
||||
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||
self.temp_files[-1].close()
|
||||
cj.load(self.temp_files[-1].name)
|
||||
|
||||
br.submit()
|
||||
|
||||
#br.set_debug_http(False)
|
||||
#br.set_debug_redirects(False)
|
||||
#br.set_debug_responses(False)
|
||||
return br
|
||||
|
||||
|
||||
|
||||
|
@ -1,107 +0,0 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
www.nikkei.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import mechanize
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
|
||||
class NikkeiNet_sub_industory(BasicNewsRecipe):
|
||||
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7523\u696d)'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
description = 'News and current market affairs from Japan'
|
||||
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
needs_subscription = True
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
language = 'ja'
|
||||
remove_javascript = False
|
||||
temp_files = []
|
||||
|
||||
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||
remove_tags = [
|
||||
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||
{'class':"cmn-print_headline cmn-clearfix"},
|
||||
]
|
||||
remove_tags_after = {'class':"cmn-pr_list"}
|
||||
|
||||
feeds = [ (u'\u65e5\u7d4c\u4f01\u696d', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
|
||||
(u'\u65e5\u7d4c\u88fd\u54c1', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
|
||||
(u'internet', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
|
||||
(u'\u56fd\u969b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
|
||||
(u'\u79d1\u5b66', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
|
||||
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
|
||||
cj = mechanize.LWPCookieJar()
|
||||
br.set_cookiejar(cj)
|
||||
|
||||
#br.set_debug_http(True)
|
||||
#br.set_debug_redirects(True)
|
||||
#br.set_debug_responses(True)
|
||||
|
||||
if self.username is not None and self.password is not None:
|
||||
#print "----------------------------get login form--------------------------------------------"
|
||||
# open login form
|
||||
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||
response = br.response()
|
||||
#print "----------------------------get login form---------------------------------------------"
|
||||
#print "----------------------------set login form---------------------------------------------"
|
||||
# remove disabled input which brings error on mechanize
|
||||
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||
br.set_response(response)
|
||||
br.select_form(name='LA0010Form01')
|
||||
br['LA0010Form01:LA0010Email'] = self.username
|
||||
br['LA0010Form01:LA0010Password'] = self.password
|
||||
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||
br.submit()
|
||||
br.response()
|
||||
#print "----------------------------send login form---------------------------------------------"
|
||||
#print "----------------------------open news main page-----------------------------------------"
|
||||
# open news site
|
||||
br.open('http://www.nikkei.com/')
|
||||
br.response()
|
||||
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||
#print response2.get_data()
|
||||
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||
# forced redirect in default
|
||||
br.select_form(nr=0)
|
||||
br.submit()
|
||||
response3 = br.response()
|
||||
# return some cookie which should be set by Javascript
|
||||
#print response3.geturl()
|
||||
raw = response3.get_data()
|
||||
#print "---------------------------response to form --------------------------------------------"
|
||||
# grab cookie from JS and set it
|
||||
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||
br.select_form(nr=0)
|
||||
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||
|
||||
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||
self.temp_files[-1].close()
|
||||
cj.load(self.temp_files[-1].name)
|
||||
|
||||
br.submit()
|
||||
|
||||
#br.set_debug_http(False)
|
||||
#br.set_debug_redirects(False)
|
||||
#br.set_debug_responses(False)
|
||||
return br
|
||||
|
||||
|
||||
|
||||
|
@ -1,104 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
www.nikkei.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import mechanize
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
|
||||
class NikkeiNet_sub_life(BasicNewsRecipe):
|
||||
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
description = 'News and current market affairs from Japan'
|
||||
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
needs_subscription = True
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
language = 'ja'
|
||||
remove_javascript = False
|
||||
temp_files = []
|
||||
|
||||
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||
remove_tags = [
|
||||
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||
{'class':"cmn-print_headline cmn-clearfix"},
|
||||
]
|
||||
remove_tags_after = {'class':"cmn-pr_list"}
|
||||
|
||||
feeds = [ (u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
||||
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
|
||||
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
||||
(u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special')
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
|
||||
cj = mechanize.LWPCookieJar()
|
||||
br.set_cookiejar(cj)
|
||||
|
||||
#br.set_debug_http(True)
|
||||
#br.set_debug_redirects(True)
|
||||
#br.set_debug_responses(True)
|
||||
|
||||
if self.username is not None and self.password is not None:
|
||||
#print "----------------------------get login form--------------------------------------------"
|
||||
# open login form
|
||||
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||
response = br.response()
|
||||
#print "----------------------------get login form---------------------------------------------"
|
||||
#print "----------------------------set login form---------------------------------------------"
|
||||
# remove disabled input which brings error on mechanize
|
||||
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||
br.set_response(response)
|
||||
br.select_form(name='LA0010Form01')
|
||||
br['LA0010Form01:LA0010Email'] = self.username
|
||||
br['LA0010Form01:LA0010Password'] = self.password
|
||||
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||
br.submit()
|
||||
br.response()
|
||||
#print "----------------------------send login form---------------------------------------------"
|
||||
#print "----------------------------open news main page-----------------------------------------"
|
||||
# open news site
|
||||
br.open('http://www.nikkei.com/')
|
||||
br.response()
|
||||
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||
#print response2.get_data()
|
||||
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||
# forced redirect in default
|
||||
br.select_form(nr=0)
|
||||
br.submit()
|
||||
response3 = br.response()
|
||||
# return some cookie which should be set by Javascript
|
||||
#print response3.geturl()
|
||||
raw = response3.get_data()
|
||||
#print "---------------------------response to form --------------------------------------------"
|
||||
# grab cookie from JS and set it
|
||||
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||
br.select_form(nr=0)
|
||||
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||
|
||||
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||
self.temp_files[-1].close()
|
||||
cj.load(self.temp_files[-1].name)
|
||||
|
||||
br.submit()
|
||||
|
||||
#br.set_debug_http(False)
|
||||
#br.set_debug_redirects(False)
|
||||
#br.set_debug_responses(False)
|
||||
return br
|
||||
|
||||
|
||||
|
||||
|
@ -1,103 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
www.nikkei.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import mechanize
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
|
||||
class NikkeiNet_sub_main(BasicNewsRecipe):
|
||||
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7dcf\u5408)'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
description = 'News and current market affairs from Japan'
|
||||
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
needs_subscription = True
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
language = 'ja'
|
||||
remove_javascript = False
|
||||
temp_files = []
|
||||
|
||||
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||
remove_tags = [
|
||||
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||
{'class':"cmn-print_headline cmn-clearfix"},
|
||||
{'class':"cmn-article_list"},
|
||||
{'class':"cmn-dashedline"},
|
||||
{'class':"cmn-hide"},
|
||||
]
|
||||
remove_tags_after = {'class':"cmn-pr_list"}
|
||||
|
||||
feeds = [ (u'NIKKEI', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=main')]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
|
||||
cj = mechanize.LWPCookieJar()
|
||||
br.set_cookiejar(cj)
|
||||
|
||||
#br.set_debug_http(True)
|
||||
#br.set_debug_redirects(True)
|
||||
#br.set_debug_responses(True)
|
||||
|
||||
if self.username is not None and self.password is not None:
|
||||
#print "----------------------------get login form--------------------------------------------"
|
||||
# open login form
|
||||
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||
response = br.response()
|
||||
#print "----------------------------get login form---------------------------------------------"
|
||||
#print "----------------------------set login form---------------------------------------------"
|
||||
# remove disabled input which brings error on mechanize
|
||||
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||
br.set_response(response)
|
||||
br.select_form(name='LA0010Form01')
|
||||
br['LA0010Form01:LA0010Email'] = self.username
|
||||
br['LA0010Form01:LA0010Password'] = self.password
|
||||
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||
br.submit()
|
||||
br.response()
|
||||
#print "----------------------------send login form---------------------------------------------"
|
||||
#print "----------------------------open news main page-----------------------------------------"
|
||||
# open news site
|
||||
br.open('http://www.nikkei.com/')
|
||||
br.response()
|
||||
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||
#print response2.get_data()
|
||||
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||
# forced redirect in default
|
||||
br.select_form(nr=0)
|
||||
br.submit()
|
||||
response3 = br.response()
|
||||
# return some cookie which should be set by Javascript
|
||||
#print response3.geturl()
|
||||
raw = response3.get_data()
|
||||
#print "---------------------------response to form --------------------------------------------"
|
||||
# grab cookie from JS and set it
|
||||
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||
br.select_form(nr=0)
|
||||
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||
|
||||
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||
self.temp_files[-1].close()
|
||||
cj.load(self.temp_files[-1].name)
|
||||
|
||||
br.submit()
|
||||
|
||||
#br.set_debug_http(False)
|
||||
#br.set_debug_redirects(False)
|
||||
#br.set_debug_responses(False)
|
||||
return br
|
||||
|
||||
|
||||
|
||||
|
@ -1,102 +0,0 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
www.nikkei.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import mechanize
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
|
||||
class NikkeiNet_sub_shakai(BasicNewsRecipe):
|
||||
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Social)'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
description = 'News and current market affairs from Japan'
|
||||
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
needs_subscription = True
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
language = 'ja'
|
||||
remove_javascript = False
|
||||
temp_files = []
|
||||
|
||||
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||
remove_tags = [
|
||||
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||
{'class':"cmn-print_headline cmn-clearfix"},
|
||||
]
|
||||
remove_tags_after = {'class':"cmn-pr_list"}
|
||||
|
||||
feeds = [
|
||||
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai')
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
|
||||
cj = mechanize.LWPCookieJar()
|
||||
br.set_cookiejar(cj)
|
||||
|
||||
#br.set_debug_http(True)
|
||||
#br.set_debug_redirects(True)
|
||||
#br.set_debug_responses(True)
|
||||
|
||||
if self.username is not None and self.password is not None:
|
||||
#print "----------------------------get login form--------------------------------------------"
|
||||
# open login form
|
||||
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||
response = br.response()
|
||||
#print "----------------------------get login form---------------------------------------------"
|
||||
#print "----------------------------set login form---------------------------------------------"
|
||||
# remove disabled input which brings error on mechanize
|
||||
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||
br.set_response(response)
|
||||
br.select_form(name='LA0010Form01')
|
||||
br['LA0010Form01:LA0010Email'] = self.username
|
||||
br['LA0010Form01:LA0010Password'] = self.password
|
||||
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||
br.submit()
|
||||
br.response()
|
||||
#print "----------------------------send login form---------------------------------------------"
|
||||
#print "----------------------------open news main page-----------------------------------------"
|
||||
# open news site
|
||||
br.open('http://www.nikkei.com/')
|
||||
br.response()
|
||||
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||
#print response2.get_data()
|
||||
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||
# forced redirect in default
|
||||
br.select_form(nr=0)
|
||||
br.submit()
|
||||
response3 = br.response()
|
||||
# return some cookie which should be set by Javascript
|
||||
#print response3.geturl()
|
||||
raw = response3.get_data()
|
||||
#print "---------------------------response to form --------------------------------------------"
|
||||
# grab cookie from JS and set it
|
||||
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||
br.select_form(nr=0)
|
||||
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||
|
||||
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||
self.temp_files[-1].close()
|
||||
cj.load(self.temp_files[-1].name)
|
||||
|
||||
br.submit()
|
||||
|
||||
#br.set_debug_http(False)
|
||||
#br.set_debug_redirects(False)
|
||||
#br.set_debug_responses(False)
|
||||
return br
|
||||
|
||||
|
||||
|
||||
|
@ -1,108 +0,0 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
www.nikkei.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import mechanize
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
|
||||
class NikkeiNet_sub_sports(BasicNewsRecipe):
|
||||
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u30b9\u30dd\u30fc\u30c4)'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
description = 'News and current market affairs from Japan'
|
||||
cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
|
||||
needs_subscription = True
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
language = 'ja'
|
||||
remove_javascript = False
|
||||
temp_files = []
|
||||
|
||||
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||
remove_tags = [
|
||||
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||
{'class':"cmn-print_headline cmn-clearfix"},
|
||||
]
|
||||
remove_tags_after = {'class':"cmn-pr_list"}
|
||||
|
||||
feeds = [
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba')
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
|
||||
cj = mechanize.LWPCookieJar()
|
||||
br.set_cookiejar(cj)
|
||||
|
||||
#br.set_debug_http(True)
|
||||
#br.set_debug_redirects(True)
|
||||
#br.set_debug_responses(True)
|
||||
|
||||
if self.username is not None and self.password is not None:
|
||||
#print "----------------------------get login form--------------------------------------------"
|
||||
# open login form
|
||||
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||
response = br.response()
|
||||
#print "----------------------------get login form---------------------------------------------"
|
||||
#print "----------------------------set login form---------------------------------------------"
|
||||
# remove disabled input which brings error on mechanize
|
||||
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||
br.set_response(response)
|
||||
br.select_form(name='LA0010Form01')
|
||||
br['LA0010Form01:LA0010Email'] = self.username
|
||||
br['LA0010Form01:LA0010Password'] = self.password
|
||||
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||
br.submit()
|
||||
br.response()
|
||||
#print "----------------------------send login form---------------------------------------------"
|
||||
#print "----------------------------open news main page-----------------------------------------"
|
||||
# open news site
|
||||
br.open('http://www.nikkei.com/')
|
||||
br.response()
|
||||
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||
#print response2.get_data()
|
||||
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||
# forced redirect in default
|
||||
br.select_form(nr=0)
|
||||
br.submit()
|
||||
response3 = br.response()
|
||||
# return some cookie which should be set by Javascript
|
||||
#print response3.geturl()
|
||||
raw = response3.get_data()
|
||||
#print "---------------------------response to form --------------------------------------------"
|
||||
# grab cookie from JS and set it
|
||||
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||
br.select_form(nr=0)
|
||||
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||
|
||||
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||
self.temp_files[-1].close()
|
||||
cj.load(self.temp_files[-1].name)
|
||||
|
||||
br.submit()
|
||||
|
||||
#br.set_debug_http(False)
|
||||
#br.set_debug_redirects(False)
|
||||
#br.set_debug_responses(False)
|
||||
return br
|
||||
|
||||
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
http://ameblo.jp/sauta19/
|
||||
'''
|
||||
@ -18,7 +18,7 @@ class UniNoHimituKichiBlog(BasicNewsRecipe):
|
||||
category = 'cat, pet, japan'
|
||||
language = 'ja'
|
||||
encoding = 'utf-8'
|
||||
|
||||
keep_only_tags = [{'class':'entry_head'},{'class':'subContentsInner'}]
|
||||
feeds = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/sauta19/rss20.xml')]
|
||||
|
||||
def parse_feeds(self):
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
www.yomiuri.co.jp
|
||||
'''
|
||||
@ -16,16 +16,13 @@ class YOLNews(BasicNewsRecipe):
|
||||
publisher = 'Yomiuri Online News'
|
||||
category = 'news, japan'
|
||||
language = 'ja'
|
||||
encoding = 'Shift_JIS'
|
||||
encoding = 'UTF-8'
|
||||
index = 'http://www.yomiuri.co.jp/latestnews/'
|
||||
remove_javascript = True
|
||||
masthead_title = u'YOMIURI ONLINE'
|
||||
|
||||
keep_only_tags = [{'class':"article-def"}]
|
||||
remove_tags = [{'class':"RelatedArticle"},
|
||||
{'class':"sbtns"}
|
||||
]
|
||||
remove_tags_after = {'class':"date-def"}
|
||||
|
||||
keep_only_tags = [{'class':"article text-resizeable"}]
|
||||
|
||||
def parse_feeds(self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
@ -42,22 +39,22 @@ class YOLNews(BasicNewsRecipe):
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
newsarticles = []
|
||||
soup = self.index_to_soup(self.index)
|
||||
topstories = soup.find('ul',attrs={'class':'list-def'})
|
||||
if topstories:
|
||||
newsarticles = []
|
||||
for itt in topstories.findAll('li'):
|
||||
itema = itt.find('a',href=True)
|
||||
if itema:
|
||||
itd1 = itema.findNextSibling(text = True)
|
||||
itd2 = itd1.findNextSibling(text = True)
|
||||
itd3 = itd2.findNextSibling(text = True)
|
||||
newsarticles.append({
|
||||
'title' :itema.string
|
||||
,'date' :''.join([itd1, itd2, itd3])
|
||||
,'url' :'http://www.yomiuri.co.jp' + itema['href']
|
||||
,'description':''
|
||||
})
|
||||
feeds.append(('latest', newsarticles))
|
||||
listlatest = soup.find('ul', attrs={'class':'list-common list-common-latest'})
|
||||
if listlatest:
|
||||
for itt in listlatest.findAll('li'):
|
||||
itema = itt.find('a',href=True)
|
||||
if itema:
|
||||
item_headline = itema.find('span',attrs={'class':'headline'})
|
||||
item_date = item_headline.find('span',attrs={'class':'update'})
|
||||
newsarticles.append({
|
||||
'title' :item_headline.contents[0]
|
||||
,'date' :item_date
|
||||
,'url' :itema['href']
|
||||
,'description':''
|
||||
})
|
||||
feeds.append(('latest', newsarticles))
|
||||
return feeds
|
||||
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
|
||||
'''
|
||||
www.yomiuri.co.jp
|
||||
'''
|
||||
@ -16,16 +16,12 @@ class YOLNews(BasicNewsRecipe):
|
||||
publisher = 'Yomiuri Online News'
|
||||
category = 'news, japan'
|
||||
language = 'ja'
|
||||
encoding = 'Shift_JIS'
|
||||
encoding = 'UTF-8'
|
||||
index = 'http://www.yomiuri.co.jp/world/'
|
||||
remove_javascript = True
|
||||
masthead_title = u"YOMIURI ONLINE"
|
||||
|
||||
keep_only_tags = [{'class':"article-def"}]
|
||||
remove_tags = [{'class':"RelatedArticle"},
|
||||
{'class':"sbtns"}
|
||||
]
|
||||
remove_tags_after = {'class':"date-def"}
|
||||
keep_only_tags = [{'class':"article text-resizeable"}]
|
||||
|
||||
def parse_feeds(self):
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
@ -42,20 +38,36 @@ class YOLNews(BasicNewsRecipe):
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
newsarticles = []
|
||||
soup = self.index_to_soup(self.index)
|
||||
topstories = soup.find('ul',attrs={'class':'list-def'})
|
||||
if topstories:
|
||||
newsarticles = []
|
||||
for itt in topstories.findAll('li'):
|
||||
itema = itt.find('a',href=True)
|
||||
if itema:
|
||||
itd1 = itema.findNextSibling(text = True)
|
||||
newsarticles.append({
|
||||
'title' :itema.string
|
||||
,'date' :''.join([itd1])
|
||||
,'url' :'http://www.yomiuri.co.jp' + itema['href']
|
||||
,'description':''
|
||||
})
|
||||
feeds.append(('World', newsarticles))
|
||||
mainspan = soup.find('div', attrs={'class':'pbNested span-main-inr'})
|
||||
if mainspan:
|
||||
topstories = mainspan.find('ul',attrs={'class':'list-top'})
|
||||
if topstories:
|
||||
for itt in topstories.findAll('li'):
|
||||
itema = itt.find('a',href=True)
|
||||
if itema:
|
||||
item_headline = itema.find('span',attrs={'class':'headline'})
|
||||
item_date = item_headline.find('span',attrs={'class':'update'})
|
||||
newsarticles.append({
|
||||
'title' :item_headline.contents[0]
|
||||
,'date' :item_date
|
||||
,'url' :itema['href']
|
||||
,'description':''
|
||||
})
|
||||
secondstories = mainspan.find('ul', attrs={'class':'list-common'})
|
||||
if secondstories:
|
||||
for itt in secondstories.findAll('li'):
|
||||
itema = itt.find('a',href=True)
|
||||
if itema:
|
||||
item_headline = itema.find('span',attrs={'class':'headline'})
|
||||
item_date = item_headline.find('span',attrs={'class':'update'})
|
||||
newsarticles.append({
|
||||
'title' :item_headline.contents[0]
|
||||
,'date' :item_date
|
||||
,'url' :itema['href']
|
||||
,'description':''
|
||||
})
|
||||
feeds.append(('World', newsarticles))
|
||||
return feeds
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user