Fix #996227 (Updated recipe for mainichi news - IT and electoronics)

2025-07-09 03:04:10 -04:00 · 2012-05-08 07:46:44 +05:30 · 2012-05-08 07:46:44 +05:30 · 42b2e15451
commit 42b2e15451
parent 55bbc9986d
2 changed files with 59 additions and 34 deletions
--- a/recipes/mainichi_it_news.recipe
+++ b/recipes/mainichi_it_news.recipe
@ -1,34 +0,0 @@
-from calibre.web.feeds.news import BasicNewsRecipe
-import re
-
-class MainichiDailyITNews(BasicNewsRecipe):
-    title          = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
-    __author__     = 'Hiroshi Miura'
-    oldest_article = 2
-    max_articles_per_feed = 100
-    description    = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics'
-    publisher      = 'Mainichi Daily News'
-    category       = 'news, Japan, IT, Electronics'
-    language       = 'ja'
-
-    feeds          = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')]
-
-    remove_tags_before = {'class':"NewsTitle"}
-    remove_tags = [{'class':"RelatedArticle"}]
-    remove_tags_after = {'class':"Credit"}
-
-    def parse_feeds(self):
-
-        feeds = BasicNewsRecipe.parse_feeds(self)
-
-        for curfeed in feeds:
-            delList = []
-            for a,curarticle in enumerate(curfeed.articles):
-                if re.search(r'pheedo.jp', curarticle.url):
-                    delList.append(curarticle)
-            if len(delList)>0:
-                for d in delList:
-                    index = curfeed.articles.index(d)
-                    curfeed.articles[index:index+1] = []
-
-        return feeds
--- a/recipes/mainichi_science_news.recipe
+++ b/recipes/mainichi_science_news.recipe
@ -0,0 +1,59 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+'''
+www.mainichi.jp
+'''
+
+import re
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class MainichiDailyScienceNews(BasicNewsRecipe):
+    title          = u'\u6bce\u65e5\u65b0\u805e(Science)'
+    __author__     = 'Hiroshi Miura'
+    oldest_article = 2
+    max_articles_per_feed = 20
+    description    = 'Japanese traditional newspaper Mainichi Daily News - science'
+    publisher      = 'Mainichi Daily News'
+    category       = 'news, japan'
+    language       = 'ja'
+    index          = 'http://mainichi.jp/select/science'
+    remove_javascript = True
+    masthead_title = u'MAINICHI DAILY NEWS'
+
+    remove_tags_before = {'class':"NewsTitle"}
+    remove_tags_after = {'class':"NewsBody clr"}
+
+    def parse_feeds(self):
+
+        feeds = BasicNewsRecipe.parse_feeds(self)
+
+        for curfeed in feeds:
+            delList = []
+            for a,curarticle in enumerate(curfeed.articles):
+                if re.search(r'rssad.jp', curarticle.url):
+                    delList.append(curarticle)
+            if len(delList)>0:
+                for d in delList:
+                    index = curfeed.articles.index(d)
+                    curfeed.articles[index:index+1] = []
+
+        return feeds
+
+    def parse_index(self):
+        feeds = []
+        soup   = self.index_to_soup(self.index)
+        topstories = soup.find('ul',attrs={'class':'MaiLink'})
+        if topstories:
+           newsarticles = []
+           for itt in topstories.findAll('li'):
+                itema = itt.find('a',href=True)
+                if itema:
+                    newsarticles.append({
+                                      'title'      :itema.string
+                                     ,'date'       :''
+                                     ,'url'        :itema['href']
+                                     ,'description':''
+                                    })
+           feeds.append(('Science', newsarticles))
+        return feeds
+