add: recipe

- add MSN Sankei News product releases
    - fix reuters_ja recipe to remove several tags
This commit is contained in:
Hiroshi Miura 2010-11-21 22:28:20 +09:00
parent bedcdac5fb
commit cee89baa64
2 changed files with 36 additions and 2 deletions

View File

@ -0,0 +1,22 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
sankei.jp.msn.com
'''
class MSNSankeiNewsProduct(BasicNewsRecipe):
title = u'MSN\u7523\u7d4c\u30cb\u30e5\u30fc\u30b9(\u65b0\u5546\u54c1)'
__author__ = 'Hiroshi Miura'
description = 'Products release from Japan'
oldest_article = 7
max_articles_per_feed = 100
encoding = 'Shift_JIS'
language = 'ja'
feeds = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')]
remove_tags_before = dict(id="__r_article_title__")
remove_tags_after = dict(id="ajax_release_news")
remove_tags = [{'class':"parent chromeCustom6G"}]

View File

@ -1,4 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class ReutersJa(BasicNewsRecipe):
@ -20,6 +21,17 @@ class ReutersJa(BasicNewsRecipe):
remove_tags_before = {'class':"article primaryContent"}
remove_tags = [ dict(id="banner"),
dict(id="autilities"),
dict(id="textSizer")
dict(id="textSizer"),
dict(id="shareFooter"),
dict(id="relatedNews"),
dict(id="editorsChoice"),
dict(id="ecArticles"),
{'class':"secondaryContent"},
{'class':"module"},
]
remove_tags_after = dict(id="copyrightNotice")
remove_tags_after = {'class':"assetBuddy"}
def print_version(self, url):
m = re.search('(.*idJPJAPAN-[0-9]+)', url)
return m.group(0)+'?sp=true'