Nikkei Businees and JB Press by Ado Nishimura

This commit is contained in:
Kovid Goyal 2011-03-10 09:47:31 -07:00
parent 283a41916e
commit 601a1cf7df
4 changed files with 129 additions and 0 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 290 B

View File

@ -0,0 +1,42 @@
import urllib2
from calibre.web.feeds.news import BasicNewsRecipe
class JBPress(BasicNewsRecipe):
title = u'JBPress'
language = 'ja'
description = u'Japan Business Press New articles (using small print version)'
__author__ = 'Ado Nishimura'
needs_subscription = True
oldest_article = 7
max_articles_per_feed = 100
remove_tags_before = dict(id='wrapper')
no_stylesheets = True
feeds = [('JBPress new article', 'http://feed.ismedia.jp/rss/jbpress/all.rdf')]
def get_cover_url(self):
return 'http://www.jbpress.co.jp/common/images/v1/jpn/common/logo.gif'
def get_browser(self):
html = '''<form action="https://jbpress.ismedia.jp/auth/dologin/http://jbpress.ismedia.jp/articles/print/5549" method="post">
<input id="login" name="login" type="text"/>
<input id="password" name="password" type="password"/>
<input id="rememberme" name="rememberme" type="checkbox"/>
</form>
'''
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('http://jbpress.ismedia.jp/articles/print/5549')
response = br.response()
response.set_data(html)
br.set_response(response)
br.select_form(nr=0)
br["login"] = self.username
br['password'] = self.password
br.submit()
return br
def print_version(self, url):
url = urllib2.urlopen(url).geturl() # resolve redirect.
return url.replace('/-/', '/print/')

View File

@ -0,0 +1,33 @@
EMAILADDRESS = 'hoge@foobar.co.jp'
from calibre.web.feeds.news import BasicNewsRecipe
class NBOnline(BasicNewsRecipe):
title = u'Nikkei Business Online'
language = 'ja'
description = u'Nikkei Business Online New articles. PLEASE NOTE: You need to edit EMAILADDRESS line of this "nbonline.recipe" file to set your e-mail address which is needed when login. (file is in "Calibre2/resources/recipes" directory.)'
__author__ = 'Ado Nishimura'
needs_subscription = True
oldest_article = 7
max_articles_per_feed = 100
remove_tags_before = dict(id='kanban')
remove_tags = [dict(name='div', id='footer')]
feeds = [('Nikkei Buisiness Online', 'http://business.nikkeibp.co.jp/rss/all_nbo.rdf')]
def get_cover_url(self):
return 'http://business.nikkeibp.co.jp/images/nbo/200804/parts/logo.gif'
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:
br.open('https://signon.nikkeibp.co.jp/front/login/?ct=p&ts=nbo')
br.select_form(name='loginActionForm')
br['email'] = EMAILADDRESS
br['userId'] = self.username
br['password'] = self.password
br.submit()
return br
def print_version(self, url):
return url + '?ST=print'

View File

@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = u'2011, Silviu Cotoar\u0103'
'''
publika.md
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Publika(BasicNewsRecipe):
title = u'Publika'
__author__ = u'Silviu Cotoar\u0103'
description = u'\u015etiri din Moldova'
publisher = u'Publika'
oldest_article = 25
language = 'ro'
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
category = 'Ziare,Stiri,Moldova'
encoding = 'utf-8'
cover_url = 'http://assets.publika.md/images/logo.jpg'
conversion_options = {
'comments' : description
,'tags' : category
,'language' : language
,'publisher' : publisher
}
keep_only_tags = [
dict(name='div', attrs={'id':'colLeft'})
]
remove_tags = [
dict(name='div', attrs={'class':['articleInfo']})
, dict(name='div', attrs={'class':['articleRelated']})
, dict(name='div', attrs={'class':['roundedBox socialSharing']})
, dict(name='div', attrs={'class':['comment clearfix']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['roundedBox socialSharing']})
, dict(name='div', attrs={'class':['comment clearfix']})
]
feeds = [
(u'Feeds', u'http://rss.publika.md/stiri.xml')
]
def preprocess_html(self, soup):
return self.adeify_images(soup)