diff --git a/resources/recipes/mainichi_it_news.recipe b/resources/recipes/mainichi_it_news.recipe index 4c285a2c01..eddab149cd 100644 --- a/resources/recipes/mainichi_it_news.recipe +++ b/resources/recipes/mainichi_it_news.recipe @@ -1,4 +1,5 @@ from calibre.web.feeds.news import BasicNewsRecipe +import re class MainichiDailyITNews(BasicNewsRecipe): title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)' @@ -14,6 +15,7 @@ class MainichiDailyITNews(BasicNewsRecipe): remove_tags_before = {'class':"NewsTitle"} remove_tags = [{'class':"RelatedArticle"}] + remove_tags_after = {'class':"Credit"} def parse_feeds(self): @@ -29,4 +31,4 @@ class MainichiDailyITNews(BasicNewsRecipe): index = curfeed.articles.index(d) curfeed.articles[index:index+1] = [] - return feeds remove_tags_after = {'class':"Credit"} + return feeds diff --git a/resources/recipes/the_h.recipe b/resources/recipes/the_h.recipe index dbfad7e32a..28a1571dc5 100644 --- a/resources/recipes/the_h.recipe +++ b/resources/recipes/the_h.recipe @@ -14,7 +14,7 @@ class TheHeiseOnline(BasicNewsRecipe): oldest_article = 3 description = 'In association with Heise Online' publisher = 'Heise Media UK Ltd.' - category = 'news, technology, security' + category = 'news, technology, security, OSS, internet' max_articles_per_feed = 100 language = 'en' encoding = 'utf-8' @@ -27,6 +27,12 @@ class TheHeiseOnline(BasicNewsRecipe): feeds = [ (u'The H News Feed', u'http://www.h-online.com/news/atom.xml') ] + cover_url = 'http://www.h-online.com/icons/logo_theH.gif' + + remove_tags = [ + dict(id="logo"), + dict(id="footer") + ] def print_version(self, url): return url + '?view=print' diff --git a/resources/recipes/toyokeizai.recipe b/resources/recipes/toyokeizai.recipe new file mode 100644 index 0000000000..395a8bb9b7 --- /dev/null +++ b/resources/recipes/toyokeizai.recipe @@ -0,0 +1,68 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.toyokeizai.net +''' + +from calibre.web.feeds.news import BasicNewsRecipe +import re + +class Toyokeizai(BasicNewsRecipe): + title = u'ToyoKeizai News' + __author__ = 'Hiroshi Miura' + oldest_article = 1 + max_articles_per_feed = 50 + description = 'Japanese traditional economy and business magazine, only for advanced subscribers supported' + publisher = 'Toyokeizai Shinbun Sha' + category = 'economy, magazine, japan' + language = 'ja' + encoding = 'euc-jp' + index = 'http://member.toyokeizai.net/news/' + remove_javascript = True + no_stylesheets = True + masthead_title = u'TOYOKEIZAI' + needs_subscription = True + timefmt = '[%y/%m/%d]' + recursions = 5 + match_regexps =[ r'page/\d+'] + + keep_only_tags = [ + dict(name='div', attrs={'class':['news']}), + dict(name='div', attrs={'class':["news_cont"]}), + dict(name='div', attrs={'class':["news_con"]}), +# dict(name='div', attrs={'class':["norightsMessage"]}) + ] + remove_tags = [{'class':"mt35 mgz"}, + {'class':"mt20 newzia"}, + {'class':"mt20 fontS"}, + {'class':"bk_btn_m"}, + dict(id='newzia_connect_member') + ] + + def parse_index(self): + feeds = [] + soup = self.index_to_soup(self.index) + topstories = soup.find('ul',attrs={'class':'list6'}) + if topstories: + newsarticles = [] + for itt in topstories.findAll('li'): + itema = itt.find('a',href=True) + itemd = itt.find('span') + newsarticles.append({ + 'title' :itema.string + ,'date' :re.compile(r"\- ").sub("",itemd.string) + ,'url' :'http://member.toyokeizai.net' + itema['href'] + ,'description':itema['title'] + }) + feeds.append(('news', newsarticles)) + return feeds + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('http://member.toyokeizai.net/norights/form/') + br.select_form(nr=0) + br['kaiin_id'] = self.username + br['password'] = self.password + res = br.submit() + return br