recipe: update yomiuri onlie world

Signed-off-by: Hiroshi Miura <miurahr@linux.com>
2025-07-09 03:04:10 -04:00 · 2014-05-06 23:16:13 +09:00 · 2014-05-06 23:16:13 +09:00 · 4bb290657d
commit 4bb290657d
parent 521a8f93c1
2 changed files with 53 additions and 44 deletions
--- a/recipes/yomiuri.recipe
+++ b/recipes/yomiuri.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.yomiuri.co.jp
 '''
@ -16,16 +16,13 @@ class YOLNews(BasicNewsRecipe):
    publisher      = 'Yomiuri Online News'
    category       = 'news, japan'
    language       = 'ja'
-    encoding       = 'Shift_JIS'
+    encoding       = 'UTF-8'
    index          = 'http://www.yomiuri.co.jp/latestnews/'
    remove_javascript = True
    masthead_title = u'YOMIURI ONLINE'
-    keep_only_tags = [{'class':"article-def"}]
+
-    remove_tags = [{'class':"RelatedArticle"},
+    keep_only_tags = [{'class':"article text-resizeable"}]
                   {'class':"sbtns"}
                    ]
    remove_tags_after = {'class':"date-def"}
    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
@ -42,22 +39,22 @@ class YOLNews(BasicNewsRecipe):
    def parse_index(self):
        feeds = []
        newsarticles = []
        soup   = self.index_to_soup(self.index)
-        topstories = soup.find('ul',attrs={'class':'list-def'})
+        listlatest = soup.find('ul', attrs={'class':'list-common list-common-latest'})
-        if topstories:
+        if listlatest:
-           newsarticles = []
+                for itt in listlatest.findAll('li'):
-           for itt in topstories.findAll('li'):
+                    itema = itt.find('a',href=True)
-                itema = itt.find('a',href=True)
+                    if itema:
-                if itema:
+                        item_headline = itema.find('span',attrs={'class':'headline'})
-                    itd1 = itema.findNextSibling(text = True)
+                        item_date     = item_headline.find('span',attrs={'class':'update'})
-                    itd2 = itd1.findNextSibling(text = True)
+                        newsarticles.append({
-                    itd3 = itd2.findNextSibling(text = True)
+                               'title'      :item_headline.contents[0]
-                    newsarticles.append({
+                              ,'date'       :item_date
-                                      'title'      :itema.string
+                              ,'url'        :itema['href']
-                                     ,'date'       :''.join([itd1, itd2, itd3])
+                              ,'description':''
-                                     ,'url'        :'http://www.yomiuri.co.jp' + itema['href']
+                        })
-                                     ,'description':''
+        feeds.append(('latest', newsarticles))
                                    })
           feeds.append(('latest', newsarticles))
        return feeds
--- a/recipes/yomiuri_world.recipe
+++ b/recipes/yomiuri_world.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.yomiuri.co.jp
 '''
@ -16,16 +16,12 @@ class YOLNews(BasicNewsRecipe):
    publisher      = 'Yomiuri Online News'
    category       = 'news, japan'
    language       = 'ja'
-    encoding       = 'Shift_JIS'
+    encoding       = 'UTF-8'
    index          = 'http://www.yomiuri.co.jp/world/'
    remove_javascript = True
    masthead_title = u"YOMIURI ONLINE"
-    keep_only_tags = [{'class':"article-def"}]
+    keep_only_tags = [{'class':"article text-resizeable"}]
    remove_tags = [{'class':"RelatedArticle"},
                   {'class':"sbtns"}
                    ]
    remove_tags_after = {'class':"date-def"}
    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
@ -42,20 +38,36 @@ class YOLNews(BasicNewsRecipe):
    def parse_index(self):
        feeds = []
        newsarticles = []
        soup   = self.index_to_soup(self.index)
-        topstories = soup.find('ul',attrs={'class':'list-def'})
+        mainspan = soup.find('div', attrs={'class':'pbNested span-main-inr'})
-        if topstories:
+        if mainspan:
-           newsarticles = []
+            topstories = mainspan.find('ul',attrs={'class':'list-top'})
-           for itt in topstories.findAll('li'):
+            if topstories:
-                itema = itt.find('a',href=True)
+                for itt in topstories.findAll('li'):
-                if itema:
+                    itema = itt.find('a',href=True)
-                    itd1 = itema.findNextSibling(text = True)
+                    if itema:
-                    newsarticles.append({
+                        item_headline = itema.find('span',attrs={'class':'headline'})
-                                      'title'      :itema.string
+                        item_date     = item_headline.find('span',attrs={'class':'update'})
-                                     ,'date'       :''.join([itd1])
+                        newsarticles.append({
-                                     ,'url'        :'http://www.yomiuri.co.jp' + itema['href']
+                               'title'      :item_headline.contents[0]
-                                     ,'description':''
+                              ,'date'       :item_date
-                                    })
+                              ,'url'        :itema['href']
-           feeds.append(('World', newsarticles))
+                              ,'description':''
                        })
            secondstories = mainspan.find('ul', attrs={'class':'list-common'})
            if secondstories:
                for itt in secondstories.findAll('li'):
                    itema = itt.find('a',href=True)
                    if itema:
                        item_headline = itema.find('span',attrs={'class':'headline'})
                        item_date     = item_headline.find('span',attrs={'class':'update'})
                        newsarticles.append({
                               'title'      :item_headline.contents[0]
                              ,'date'       :item_date
                              ,'url'        :itema['href']
                              ,'description':''
                        })
        feeds.append(('World', newsarticles))
        return feeds