ToyoKeizai News by Hiroshi Miura

This commit is contained in:
Kovid Goyal 2010-12-08 09:39:05 -07:00
commit bb14142bec
3 changed files with 78 additions and 2 deletions

View File

@ -1,4 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class MainichiDailyITNews(BasicNewsRecipe): class MainichiDailyITNews(BasicNewsRecipe):
title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)' title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
@ -14,6 +15,7 @@ class MainichiDailyITNews(BasicNewsRecipe):
remove_tags_before = {'class':"NewsTitle"} remove_tags_before = {'class':"NewsTitle"}
remove_tags = [{'class':"RelatedArticle"}] remove_tags = [{'class':"RelatedArticle"}]
remove_tags_after = {'class':"Credit"}
def parse_feeds(self): def parse_feeds(self):
@ -29,4 +31,4 @@ class MainichiDailyITNews(BasicNewsRecipe):
index = curfeed.articles.index(d) index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = [] curfeed.articles[index:index+1] = []
return feeds remove_tags_after = {'class':"Credit"} return feeds

View File

@ -14,7 +14,7 @@ class TheHeiseOnline(BasicNewsRecipe):
oldest_article = 3 oldest_article = 3
description = 'In association with Heise Online' description = 'In association with Heise Online'
publisher = 'Heise Media UK Ltd.' publisher = 'Heise Media UK Ltd.'
category = 'news, technology, security' category = 'news, technology, security, OSS, internet'
max_articles_per_feed = 100 max_articles_per_feed = 100
language = 'en' language = 'en'
encoding = 'utf-8' encoding = 'utf-8'
@ -27,6 +27,12 @@ class TheHeiseOnline(BasicNewsRecipe):
feeds = [ feeds = [
(u'The H News Feed', u'http://www.h-online.com/news/atom.xml') (u'The H News Feed', u'http://www.h-online.com/news/atom.xml')
] ]
cover_url = 'http://www.h-online.com/icons/logo_theH.gif'
remove_tags = [
dict(id="logo"),
dict(id="footer")
]
def print_version(self, url): def print_version(self, url):
return url + '?view=print' return url + '?view=print'

View File

@ -0,0 +1,68 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
www.toyokeizai.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
import re
class Toyokeizai(BasicNewsRecipe):
title = u'ToyoKeizai News'
__author__ = 'Hiroshi Miura'
oldest_article = 1
max_articles_per_feed = 50
description = 'Japanese traditional economy and business magazine, only for advanced subscribers supported'
publisher = 'Toyokeizai Shinbun Sha'
category = 'economy, magazine, japan'
language = 'ja'
encoding = 'euc-jp'
index = 'http://member.toyokeizai.net/news/'
remove_javascript = True
no_stylesheets = True
masthead_title = u'TOYOKEIZAI'
needs_subscription = True
timefmt = '[%y/%m/%d]'
recursions = 5
match_regexps =[ r'page/\d+']
keep_only_tags = [
dict(name='div', attrs={'class':['news']}),
dict(name='div', attrs={'class':["news_cont"]}),
dict(name='div', attrs={'class':["news_con"]}),
# dict(name='div', attrs={'class':["norightsMessage"]})
]
remove_tags = [{'class':"mt35 mgz"},
{'class':"mt20 newzia"},
{'class':"mt20 fontS"},
{'class':"bk_btn_m"},
dict(id='newzia_connect_member')
]
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.index)
topstories = soup.find('ul',attrs={'class':'list6'})
if topstories:
newsarticles = []
for itt in topstories.findAll('li'):
itema = itt.find('a',href=True)
itemd = itt.find('span')
newsarticles.append({
'title' :itema.string
,'date' :re.compile(r"\- ").sub("",itemd.string)
,'url' :'http://member.toyokeizai.net' + itema['href']
,'description':itema['title']
})
feeds.append(('news', newsarticles))
return feeds
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://member.toyokeizai.net/norights/form/')
br.select_form(nr=0)
br['kaiin_id'] = self.username
br['password'] = self.password
res = br.submit()
return br