From 42b2e1545177c4e5b7f80c6d4b9381653c51fd44 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 8 May 2012 07:46:44 +0530 Subject: [PATCH] Fix #996227 (Updated recipe for mainichi news - IT and electoronics) --- recipes/mainichi_it_news.recipe | 34 ---------------- recipes/mainichi_science_news.recipe | 59 ++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 34 deletions(-) delete mode 100644 recipes/mainichi_it_news.recipe create mode 100644 recipes/mainichi_science_news.recipe diff --git a/recipes/mainichi_it_news.recipe b/recipes/mainichi_it_news.recipe deleted file mode 100644 index eddab149cd..0000000000 --- a/recipes/mainichi_it_news.recipe +++ /dev/null @@ -1,34 +0,0 @@ -from calibre.web.feeds.news import BasicNewsRecipe -import re - -class MainichiDailyITNews(BasicNewsRecipe): - title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)' - __author__ = 'Hiroshi Miura' - oldest_article = 2 - max_articles_per_feed = 100 - description = 'Japanese traditional newspaper Mainichi Daily News - IT and electronics' - publisher = 'Mainichi Daily News' - category = 'news, Japan, IT, Electronics' - language = 'ja' - - feeds = [(u'IT News', u'http://mainichi.pheedo.jp/f/mainichijp_electronics')] - - remove_tags_before = {'class':"NewsTitle"} - remove_tags = [{'class':"RelatedArticle"}] - remove_tags_after = {'class':"Credit"} - - def parse_feeds(self): - - feeds = BasicNewsRecipe.parse_feeds(self) - - for curfeed in feeds: - delList = [] - for a,curarticle in enumerate(curfeed.articles): - if re.search(r'pheedo.jp', curarticle.url): - delList.append(curarticle) - if len(delList)>0: - for d in delList: - index = curfeed.articles.index(d) - curfeed.articles[index:index+1] = [] - - return feeds diff --git a/recipes/mainichi_science_news.recipe b/recipes/mainichi_science_news.recipe new file mode 100644 index 0000000000..75d0becc9f --- /dev/null +++ b/recipes/mainichi_science_news.recipe @@ -0,0 +1,59 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Hiroshi Miura ' +''' +www.mainichi.jp +''' + +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class MainichiDailyScienceNews(BasicNewsRecipe): + title = u'\u6bce\u65e5\u65b0\u805e(Science)' + __author__ = 'Hiroshi Miura' + oldest_article = 2 + max_articles_per_feed = 20 + description = 'Japanese traditional newspaper Mainichi Daily News - science' + publisher = 'Mainichi Daily News' + category = 'news, japan' + language = 'ja' + index = 'http://mainichi.jp/select/science' + remove_javascript = True + masthead_title = u'MAINICHI DAILY NEWS' + + remove_tags_before = {'class':"NewsTitle"} + remove_tags_after = {'class':"NewsBody clr"} + + def parse_feeds(self): + + feeds = BasicNewsRecipe.parse_feeds(self) + + for curfeed in feeds: + delList = [] + for a,curarticle in enumerate(curfeed.articles): + if re.search(r'rssad.jp', curarticle.url): + delList.append(curarticle) + if len(delList)>0: + for d in delList: + index = curfeed.articles.index(d) + curfeed.articles[index:index+1] = [] + + return feeds + + def parse_index(self): + feeds = [] + soup = self.index_to_soup(self.index) + topstories = soup.find('ul',attrs={'class':'MaiLink'}) + if topstories: + newsarticles = [] + for itt in topstories.findAll('li'): + itema = itt.find('a',href=True) + if itema: + newsarticles.append({ + 'title' :itema.string + ,'date' :'' + ,'url' :itema['href'] + ,'description':'' + }) + feeds.append(('Science', newsarticles)) + return feeds +