From 9015213161485fc417dfd78b22d9eefb971bf2db Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Jul 2012 15:09:43 +0530 Subject: [PATCH] Fix San Francisco Bay Guardian --- recipes/sfbg.recipe | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/recipes/sfbg.recipe b/recipes/sfbg.recipe index 0735e760c6..5c77c96f74 100644 --- a/recipes/sfbg.recipe +++ b/recipes/sfbg.recipe @@ -1,25 +1,35 @@ from calibre.web.feeds.news import BasicNewsRecipe class SanFranciscoBayGuardian(BasicNewsRecipe): - title = u'San Francisco Bay Guardian' - language = 'en' - __author__ = 'Krittika Goyal' + title = u'San Francisco Bay Guardian' + language = 'en' + __author__ = 'Krittika Goyal' oldest_article = 31 #days max_articles_per_feed = 25 + #encoding = 'latin1' no_stylesheets = True + #remove_tags_before = dict(name='div', attrs={'id':'story_header'}) + #remove_tags_after = dict(name='div', attrs={'id':'shirttail'}) remove_tags = [ - dict(name='iframe'), + dict(name='iframe'), + #dict(name='div', attrs={'class':'related-articles'}), + #dict(name='div', attrs={'id':['story_tools', 'toolbox', 'shirttail', 'comment_widget']}), + #dict(name='ul', attrs={'class':'article-tools'}), + #dict(name='ul', attrs={'id':'story_tabs'}), ] feeds = [ ('sfbg', 'http://www.sfbg.com/rss.xml'), - ('politics', 'http://www.sfbg.com/politics/rss.xml'), - ('blogs', 'http://www.sfbg.com/blog/rss.xml'), - ('pixel_vision', 'http://www.sfbg.com/pixel_vision/rss.xml'), - ('bruce', 'http://www.sfbg.com/bruce/rss.xml'), ] - + #def preprocess_html(self, soup): + #story = soup.find(name='div', attrs={'id':'story_body'}) + #td = heading.findParent(name='td') + #td.extract() + #soup = BeautifulSoup('t') + #body = soup.find(name='body') + #body.insert(0, story) + #return soup