From ce6ef6e01a2e905d593d002b78f7aa1c8054ec44 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 6 Mar 2010 22:16:38 -0700 Subject: [PATCH] News Observer by Krittika Goyal --- resources/recipes/newsobs.recipe | 33 ++++++++++++++++++++++++++++++++ resources/recipes/sfbg.recipe | 33 +++++++++++++------------------- 2 files changed, 46 insertions(+), 20 deletions(-) create mode 100644 resources/recipes/newsobs.recipe diff --git a/resources/recipes/newsobs.recipe b/resources/recipes/newsobs.recipe new file mode 100644 index 0000000000..bb603a1bf1 --- /dev/null +++ b/resources/recipes/newsobs.recipe @@ -0,0 +1,33 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class NewsAndObserver(BasicNewsRecipe): + title = u'News And Observer' + language = 'en' + __author__ = 'Krittika Goyal' + oldest_article = 1 #days + max_articles_per_feed = 25 + #encoding = 'latin1' + + no_stylesheets = True + remove_tags_before = dict(name='div', attrs={'id':'story_header'}) + remove_tags_after = dict(name='div', attrs={'id':'shirttail'}) + remove_tags = [ + dict(name='iframe'), + dict(name='div', attrs={'class':['contained_round', 'contained']}), + dict(name='div', attrs={'id':['story_tools', 'toolbox', 'shirttail', 'comment_widget', 'stories_widget', 'classifieds_widget', 'most_popular_widget', 'footer']}), + #dict(name='ul', attrs={'class':'article-tools'}), + dict(name='ul', attrs={'id':'story_tabs'}), + ] + + + feeds = [ + ('Cover', 'http://www.newsobserver.com/100/index.rss'), + ('News', 'http://www.newsobserver.com/102/index.rss'), + ('Politics', 'http://www.newsobserver.com/105/index.rss'), + ('Business', 'http://www.newsobserver.com/104/index.rss'), + ('Sports', 'http://www.newsobserver.com/103/index.rss'), + ('College Sports', 'http://www.newsobserver.com/119/index.rss'), + ('Lifestyles', 'http://www.newsobserver.com/106/index.rss'), + ('Editorials', 'http://www.newsobserver.com/158/index.rss')] + + diff --git a/resources/recipes/sfbg.recipe b/resources/recipes/sfbg.recipe index 5530bc7163..5c77c96f74 100644 --- a/resources/recipes/sfbg.recipe +++ b/resources/recipes/sfbg.recipe @@ -1,42 +1,35 @@ from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import BeautifulSoup class SanFranciscoBayGuardian(BasicNewsRecipe): title = u'San Francisco Bay Guardian' language = 'en' __author__ = 'Krittika Goyal' - oldest_article = 1 #days + oldest_article = 31 #days max_articles_per_feed = 25 #encoding = 'latin1' no_stylesheets = True - remove_tags_before = dict(name='div', attrs={'id':'story_header'}) - remove_tags_after = dict(name='div', attrs={'id':'shirttail'}) + #remove_tags_before = dict(name='div', attrs={'id':'story_header'}) + #remove_tags_after = dict(name='div', attrs={'id':'shirttail'}) remove_tags = [ dict(name='iframe'), #dict(name='div', attrs={'class':'related-articles'}), - dict(name='div', attrs={'id':['story_tools', 'toolbox', 'shirttail', 'comment_widget']}), + #dict(name='div', attrs={'id':['story_tools', 'toolbox', 'shirttail', 'comment_widget']}), #dict(name='ul', attrs={'class':'article-tools'}), - dict(name='ul', attrs={'id':'story_tabs'}), + #dict(name='ul', attrs={'id':'story_tabs'}), ] feeds = [ - ('Cover', 'http://www.newsobserver.com/100/index.rss'), - ('News', 'http://www.newsobserver.com/102/index.rss'), - ('Politics', 'http://www.newsobserver.com/105/index.rss'), - ('Business', 'http://www.newsobserver.com/104/index.rss'), - ('Sports', 'http://www.newsobserver.com/103/index.rss'), - ('College Sports', 'http://www.newsobserver.com/119/index.rss'), - ('Lifestyles', 'http://www.newsobserver.com/106/index.rss'), - ('Editorials', 'http://www.newsobserver.com/158/index.rss')] + ('sfbg', 'http://www.sfbg.com/rss.xml'), + ] - def preprocess_html(self, soup): - story = soup.find(name='div', attrs={'id':'story_body'}) + #def preprocess_html(self, soup): + #story = soup.find(name='div', attrs={'id':'story_body'}) #td = heading.findParent(name='td') #td.extract() - soup = BeautifulSoup('t') - body = soup.find(name='body') - body.insert(0, story) - return soup + #soup = BeautifulSoup('t') + #body = soup.find(name='body') + #body.insert(0, story) + #return soup