ToyoKeizai News by Hiroshi Miura

2025-08-30 23:00:21 -04:00 · 2010-12-08 09:39:05 -07:00 · 2010-12-08 09:39:05 -07:00 · bb14142bec
commit bb14142bec
parent 388d448008 c5e866718c
3 changed files with 78 additions and 2 deletions
--- a/resources/recipes/mainichi_it_news.recipe
+++ b/resources/recipes/mainichi_it_news.recipe
@ -1,4 +1,5 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class MainichiDailyITNews(BasicNewsRecipe):
    title          = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
@ -14,6 +15,7 @@ class MainichiDailyITNews(BasicNewsRecipe):
    remove_tags_before = {'class':"NewsTitle"}
    remove_tags = [{'class':"RelatedArticle"}]
    remove_tags_after = {'class':"Credit"}
    def parse_feeds(self):
@ -29,4 +31,4 @@ class MainichiDailyITNews(BasicNewsRecipe):
                    index = curfeed.articles.index(d)
                    curfeed.articles[index:index+1] = []
-        return feeds   remove_tags_after = {'class':"Credit"}
+        return feeds
--- a/resources/recipes/the_h.recipe
+++ b/resources/recipes/the_h.recipe
@ -14,7 +14,7 @@ class TheHeiseOnline(BasicNewsRecipe):
    oldest_article = 3
    description    = 'In association with Heise Online'
    publisher      = 'Heise Media UK Ltd.'
-    category       = 'news, technology, security'
+    category       = 'news, technology, security, OSS, internet'
    max_articles_per_feed = 100
    language       = 'en'
    encoding       = 'utf-8'
@ -27,6 +27,12 @@ class TheHeiseOnline(BasicNewsRecipe):
    feeds          = [
                      (u'The H News Feed', u'http://www.h-online.com/news/atom.xml')
                     ]
    cover_url = 'http://www.h-online.com/icons/logo_theH.gif'
    remove_tags = [
                        dict(id="logo"),
                        dict(id="footer")
                        ]
    def print_version(self, url):
        return url + '?view=print'
--- a/resources/recipes/toyokeizai.recipe
+++ b/resources/recipes/toyokeizai.recipe
@ -0,0 +1,68 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 '''
 www.toyokeizai.net
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 import re
 class Toyokeizai(BasicNewsRecipe):
    title          = u'ToyoKeizai News'
    __author__     = 'Hiroshi Miura'
    oldest_article = 1
    max_articles_per_feed = 50
    description    = 'Japanese traditional economy and business magazine, only for advanced subscribers supported'
    publisher      = 'Toyokeizai Shinbun Sha'
    category       = 'economy, magazine, japan'
    language       = 'ja'
    encoding       = 'euc-jp'
    index          = 'http://member.toyokeizai.net/news/'
    remove_javascript = True
    no_stylesheets = True
    masthead_title = u'TOYOKEIZAI'
    needs_subscription = True
    timefmt = '[%y/%m/%d]'
    recursions = 5
    match_regexps =[ r'page/\d+']
    keep_only_tags = [
                      dict(name='div', attrs={'class':['news']}),
                      dict(name='div', attrs={'class':["news_cont"]}),
                      dict(name='div', attrs={'class':["news_con"]}),
 #                      dict(name='div', attrs={'class':["norightsMessage"]})
                     ]
    remove_tags = [{'class':"mt35 mgz"},
                            {'class':"mt20 newzia"},
                            {'class':"mt20 fontS"},
                            {'class':"bk_btn_m"},
                            dict(id='newzia_connect_member')
                            ]
    def parse_index(self):
        feeds = []
        soup   = self.index_to_soup(self.index)
        topstories = soup.find('ul',attrs={'class':'list6'})
        if topstories:
            newsarticles = []
            for itt in topstories.findAll('li'):
                itema = itt.find('a',href=True)
                itemd = itt.find('span')
                newsarticles.append({
                                      'title'      :itema.string
                                     ,'date'       :re.compile(r"\- ").sub("",itemd.string)
                                     ,'url'        :'http://member.toyokeizai.net' + itema['href']
                                     ,'description':itema['title']
                                    })
            feeds.append(('news', newsarticles))
        return feeds
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
        if self.username is not None and self.password is not None:
            br.open('http://member.toyokeizai.net/norights/form/')
            br.select_form(nr=0)
            br['kaiin_id']   = self.username
            br['password'] = self.password
            res = br.submit()
        return br