__license__ = 'GPL v3' __copyright__ = '2010,2014, Hiroshi Miura ' ''' www.yomiuri.co.jp ''' from calibre.web.feeds.news import BasicNewsRecipe import re class YOLNews(BasicNewsRecipe): title = u'Yomiuri Online (Latest)' __author__ = 'Hiroshi Miura' oldest_article = 1 max_articles_per_feed = 50 description = 'Japanese traditional newspaper Yomiuri Online News' publisher = 'Yomiuri Online News' category = 'news, japan' language = 'ja' encoding = 'UTF-8' index = 'http://www.yomiuri.co.jp/latestnews/' remove_javascript = True masthead_title = u'YOMIURI ONLINE' keep_only_tags = [{'class': "article text-resizeable"}] def parse_feeds(self): feeds = BasicNewsRecipe.parse_feeds(self) for curfeed in feeds: delList = [] for a, curarticle in enumerate(curfeed.articles): if re.search(r'rssad.jp', curarticle.url): delList.append(curarticle) if len(delList) > 0: for d in delList: index = curfeed.articles.index(d) curfeed.articles[index:index + 1] = [] return feeds def parse_index(self): feeds = [] newsarticles = [] soup = self.index_to_soup(self.index) listlatest = soup.find( 'ul', attrs={'class': 'list-common list-common-latest'}) if listlatest: for itt in listlatest.findAll('li'): itema = itt.find('a', href=True) if itema: item_headline = itema.find( 'span', attrs={'class': 'headline'}) item_date = item_headline.find( 'span', attrs={'class': 'update'}) newsarticles.append({ 'title': item_headline.contents[0], 'date': item_date, 'url': itema['href'], 'description': '' }) feeds.append(('latest', newsarticles)) return feeds