recipe: update yomiuri onlie world

Signed-off-by: Hiroshi Miura <miurahr@linux.com>
2025-07-09 03:04:10 -04:00 · 2014-05-06 23:16:13 +09:00 · 2014-05-06 23:16:13 +09:00 · 4bb290657d
commit 4bb290657d
parent 521a8f93c1
2 changed files with 53 additions and 44 deletions
--- a/recipes/yomiuri.recipe
+++ b/recipes/yomiuri.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.yomiuri.co.jp
 '''
@ -16,16 +16,13 @@ class YOLNews(BasicNewsRecipe):
    publisher      = 'Yomiuri Online News'
    category       = 'news, japan'
    language       = 'ja'
-    encoding       = 'Shift_JIS'
+    encoding       = 'UTF-8'
    index          = 'http://www.yomiuri.co.jp/latestnews/'
    remove_javascript = True
    masthead_title = u'YOMIURI ONLINE'

-    keep_only_tags = [{'class':"article-def"}]
-    remove_tags = [{'class':"RelatedArticle"},
-                   {'class':"sbtns"}
-                    ]
-    remove_tags_after = {'class':"date-def"}
+
+    keep_only_tags = [{'class':"article text-resizeable"}]

    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
@ -42,22 +39,22 @@ class YOLNews(BasicNewsRecipe):

    def parse_index(self):
        feeds = []
-        soup   = self.index_to_soup(self.index)
-        topstories = soup.find('ul',attrs={'class':'list-def'})
-        if topstories:
        newsarticles = []
-           for itt in topstories.findAll('li'):
+        soup   = self.index_to_soup(self.index)
+        listlatest = soup.find('ul', attrs={'class':'list-common list-common-latest'})
+        if listlatest:
+                for itt in listlatest.findAll('li'):
                    itema = itt.find('a',href=True)
                    if itema:
-                    itd1 = itema.findNextSibling(text = True)
-                    itd2 = itd1.findNextSibling(text = True)
-                    itd3 = itd2.findNextSibling(text = True)
+                        item_headline = itema.find('span',attrs={'class':'headline'})
+                        item_date     = item_headline.find('span',attrs={'class':'update'})
                        newsarticles.append({
-                                      'title'      :itema.string
-                                     ,'date'       :''.join([itd1, itd2, itd3])
-                                     ,'url'        :'http://www.yomiuri.co.jp' + itema['href']
+                               'title'      :item_headline.contents[0]
+                              ,'date'       :item_date
+                              ,'url'        :itema['href']
                              ,'description':''
                        })
        feeds.append(('latest', newsarticles))
        return feeds

+
--- a/recipes/yomiuri_world.recipe
+++ b/recipes/yomiuri_world.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
+__copyright__ = '2010,2014, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.yomiuri.co.jp
 '''
@ -16,16 +16,12 @@ class YOLNews(BasicNewsRecipe):
    publisher      = 'Yomiuri Online News'
    category       = 'news, japan'
    language       = 'ja'
-    encoding       = 'Shift_JIS'
+    encoding       = 'UTF-8'
    index          = 'http://www.yomiuri.co.jp/world/'
    remove_javascript = True
    masthead_title = u"YOMIURI ONLINE"

-    keep_only_tags = [{'class':"article-def"}]
-    remove_tags = [{'class':"RelatedArticle"},
-                   {'class':"sbtns"}
-                    ]
-    remove_tags_after = {'class':"date-def"}
+    keep_only_tags = [{'class':"article text-resizeable"}]

    def parse_feeds(self):
        feeds = BasicNewsRecipe.parse_feeds(self)
@ -42,18 +38,34 @@ class YOLNews(BasicNewsRecipe):

    def parse_index(self):
        feeds = []
-        soup   = self.index_to_soup(self.index)
-        topstories = soup.find('ul',attrs={'class':'list-def'})
-        if topstories:
        newsarticles = []
+        soup   = self.index_to_soup(self.index)
+        mainspan = soup.find('div', attrs={'class':'pbNested span-main-inr'})
+        if mainspan:
+            topstories = mainspan.find('ul',attrs={'class':'list-top'})
+            if topstories:
                for itt in topstories.findAll('li'):
                    itema = itt.find('a',href=True)
                    if itema:
-                    itd1 = itema.findNextSibling(text = True)
+                        item_headline = itema.find('span',attrs={'class':'headline'})
+                        item_date     = item_headline.find('span',attrs={'class':'update'})
                        newsarticles.append({
-                                      'title'      :itema.string
-                                     ,'date'       :''.join([itd1])
-                                     ,'url'        :'http://www.yomiuri.co.jp' + itema['href']
+                               'title'      :item_headline.contents[0]
+                              ,'date'       :item_date
+                              ,'url'        :itema['href']
+                              ,'description':''
+                        })
+            secondstories = mainspan.find('ul', attrs={'class':'list-common'})
+            if secondstories:
+                for itt in secondstories.findAll('li'):
+                    itema = itt.find('a',href=True)
+                    if itema:
+                        item_headline = itema.find('span',attrs={'class':'headline'})
+                        item_date     = item_headline.find('span',attrs={'class':'update'})
+                        newsarticles.append({
+                               'title'      :item_headline.contents[0]
+                              ,'date'       :item_date
+                              ,'url'        :itema['href']
                              ,'description':''
                        })
        feeds.append(('World', newsarticles))