fix charcode and minor bugs

This commit is contained in:
Hiroshi Miura 2010-11-27 01:08:09 +09:00
parent bbf166479d
commit 38d1375974
3 changed files with 18 additions and 3 deletions

View File

@ -17,10 +17,19 @@ class JijiDotCom(BasicNewsRecipe):
encoding = 'utf-8'
oldest_article = 6
max_articles_per_feed = 100
encoding = 'EUC_JP'
language = 'ja'
cover_url = 'http://www.jiji.com/img/top_header_logo2.gif'
masthead_url = 'http://jen.jiji.com/images/logo_jijipress.gif'
top_url = 'http://www.jiji.com/'
feeds = [(u'\u30cb\u30e5\u30fc\u30b9', u'http://www.jiji.com/rss/ranking.rdf')]
remove_tags_after = dict(id="ad_google")
def get_cover_url(self):
cover_url = 'http://www.jiji.com/img/top_header_logo2.gif'
soup = self.index_to_soup(self.top_url)
cover_item = soup.find('div', attrs={'class':'top-pad-photos'})
if cover_item:
cover_url = self.top_url + cover_item.img['src']
return cover_url

View File

@ -1,4 +1,3 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
@ -16,9 +15,13 @@ class MSNSankeiNewsProduct(BasicNewsRecipe):
max_articles_per_feed = 100
encoding = 'Shift_JIS'
language = 'ja'
cover_url = 'http://sankei.jp.msn.com/images/common/sankeShinbunLogo.jpg'
masthead_url = 'http://sankei.jp.msn.com/images/common/sankeiNewsLogo.gif'
feeds = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')]
remove_tags_before = dict(id="__r_article_title__")
remove_tags_after = dict(id="ajax_release_news")
remove_tags = [{'class':"parent chromeCustom6G"}]
remove_tags = [{'class':"parent chromeCustom6G"},
{'class':"RelatedImg"}
]

View File

@ -30,6 +30,9 @@ class NikkeiNet_sub_main(BasicNewsRecipe):
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
{'class':"cmn-article_keyword cmn-clearfix"},
{'class':"cmn-print_headline cmn-clearfix"},
{'class':"cmn-article_list"},
{'class':"cmn-dashedline"},
{'class':"cmn-hide"},
]
remove_tags_after = {'class':"cmn-pr_list"}