add: recipe

- add MSN Sankei News product releases - fix reuters_ja recipe to remove several tags
2025-07-09 03:04:10 -04:00 · 2010-11-21 22:28:20 +09:00 · 2010-11-21 22:28:20 +09:00 · cee89baa64
commit cee89baa64
parent bedcdac5fb
2 changed files with 36 additions and 2 deletions
--- a/resources/recipes/msnsankei.recipe
+++ b/resources/recipes/msnsankei.recipe
@ -0,0 +1,22 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+sankei.jp.msn.com
+'''
+
+class MSNSankeiNewsProduct(BasicNewsRecipe):
+    title          = u'MSN\u7523\u7d4c\u30cb\u30e5\u30fc\u30b9(\u65b0\u5546\u54c1)'
+    __author__      = 'Hiroshi Miura'
+    description     = 'Products release from Japan'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    encoding       = 'Shift_JIS'
+    language       = 'ja'
+
+    feeds          = [(u'\u65b0\u5546\u54c1', u'http://sankei.jp.msn.com/rss/news/release.xml')]
+
+    remove_tags_before = dict(id="__r_article_title__")
+    remove_tags_after  = dict(id="ajax_release_news")
+    remove_tags = [{'class':"parent chromeCustom6G"}]
--- a/resources/recipes/reuters_ja.recipe
+++ b/resources/recipes/reuters_ja.recipe
@ -1,4 +1,5 @@
 from calibre.web.feeds.news import BasicNewsRecipe
+import re

 class ReutersJa(BasicNewsRecipe):

@ -20,6 +21,17 @@ class ReutersJa(BasicNewsRecipe):
    remove_tags_before = {'class':"article primaryContent"}
    remove_tags = [ dict(id="banner"),
                    dict(id="autilities"),
-                    dict(id="textSizer")
+                    dict(id="textSizer"),
+                    dict(id="shareFooter"),
+                    dict(id="relatedNews"),
+                    dict(id="editorsChoice"),
+                    dict(id="ecArticles"),
+                    {'class':"secondaryContent"},
+                    {'class':"module"},
                     ]
-    remove_tags_after = dict(id="copyrightNotice")
+    remove_tags_after = {'class':"assetBuddy"}
+
+    def print_version(self, url):
+        m = re.search('(.*idJPJAPAN-[0-9]+)', url)
+        return m.group(0)+'?sp=true'
+