__license__ = 'GPL v3' __copyright__ = '2010,2014, Hiroshi Miura ' ''' japan.engadget.com ''' from calibre.web.feeds.news import BasicNewsRecipe class EndgadgetJapan(BasicNewsRecipe): title = u'Endgadget\u65e5\u672c\u7248' language = 'ja' __author__ = 'Hiroshi Miura' cover_url = 'http://skins18.wincustomize.com/1/49/149320/29/7578/preview-29-7578.jpg' masthead_url = 'http://www.blogsmithmedia.com/japanese.engadget.com/media/eng-jp-logo-t.png' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True language = 'ja' encoding = 'utf-8' index = 'http://japanese.engadget.com/' remove_javascript = True remove_tags_before = dict(name="header", attrs={'class': "header"}) remove_tags_after = dict(name='div', attrs={'class': 'post-meta'}) def parse_index(self): feeds = [] newsarticles = [] soup = self.index_to_soup(self.index) for topstories in soup.findAll('header', attrs={'class': 'post-header'}): itt = topstories.find('h2') itema = itt.find('a', href=True) itemtime = topstories.find('span', attrs={'class': 'time'}) newsarticles.append({ 'title': itema.string, 'date': itemtime.string, 'url': itema['href'], 'description': '' }) feeds.append(('Latest Posts', newsarticles)) return feeds