add Kahoku Shinpo News and pet cat blog

This commit is contained in:
Hiroshi Miura 2010-12-09 23:48:57 +09:00
parent c5e866718c
commit ff6c024d2b
2 changed files with 68 additions and 0 deletions

View File

@ -0,0 +1,32 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
www.kahoku.co.jp
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class KahokuShinpoNews(BasicNewsRecipe):
title = u'Kahoku Shinpo News'
__author__ = 'Hiroshi Miura'
oldest_article = 2
max_articles_per_feed = 20
description = 'Tohoku regional news paper in Japan'
publisher = 'Kahoku Shinpo Sha'
category = 'news, japan'
language = 'ja'
encoding = 'Shift_JIS'
feeds = [(u'news', u'http://www.kahoku.co.jp/rss/index_thk.xml')]
keep_only_tags = [ dict(id="page_title"),
dict(id="news_detail"),
dict(id="bt_title"),
{'class':"photoLeft"},
dict(id="bt_body")
]
remove_tags = [ {'class':"button"}]

View File

@ -0,0 +1,36 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
http://ameblo.jp/sauta19/
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class UniNoHimituKichiBlog(BasicNewsRecipe):
title = u'Uni secret base'
__author__ = 'Hiroshi Miura'
oldest_article = 2
publication_type = 'blog'
max_articles_per_feed = 20
description = 'Japanese famous Cat blog'
publisher = ''
category = 'cat, pet, japan'
language = 'ja'
encoding = 'utf-8'
feeds = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/sauta19/rss20.xml')]
def parse_feeds(self):
feeds = BasicNewsRecipe.parse_feeds(self)
for curfeed in feeds:
delList = []
for a,curarticle in enumerate(curfeed.articles):
if re.search(r'rssad.jp', curarticle.url):
delList.append(curarticle)
if len(delList)>0:
for d in delList:
index = curfeed.articles.index(d)
curfeed.articles[index:index+1] = []
return feeds