From 1481f6275829feca6647ee6935f1e1cb6886bebb Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 5 May 2012 22:03:39 +0530 Subject: [PATCH] Fix #995142 (Updated recipe for Endgaget Japan) --- recipes/endgadget_ja.recipe | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/recipes/endgadget_ja.recipe b/recipes/endgadget_ja.recipe index 3c20380e9b..b6f45d7c0f 100644 --- a/recipes/endgadget_ja.recipe +++ b/recipes/endgadget_ja.recipe @@ -17,7 +17,26 @@ class EndgadgetJapan(BasicNewsRecipe): no_stylesheets = True language = 'ja' encoding = 'utf-8' - feeds = [(u'engadget', u'http://japanese.engadget.com/rss.xml')] + index = 'http://japanese.engadget.com/' + remove_javascript = True + + remove_tags_before = dict(name="h1", attrs={'class':"post_title"}) + remove_tags_after = dict(name='div', attrs={'class':'post_body'}) + + def parse_index(self): + feeds = [] + newsarticles = [] + soup = self.index_to_soup(self.index) + for topstories in soup.findAll('div',attrs={'class':'post_content'}): + itt = topstories.find('h4') + itema = itt.find('a',href=True) + itemdiv = itema.find('div',attrs={'class':'post_body'}) + newsarticles.append({ + 'title' :itema.string + ,'date' :'' + ,'url' :itema['href'] + ,'description':'' + }) + feeds.append(('Latest Posts', newsarticles)) + return feeds - remove_tags_before = dict(name="div", attrs={'id':"content_wrap"}) - remove_tags_after = dict(name='h3', attrs={'id':'addcomments'})