From ce6ef6e01a2e905d593d002b78f7aa1c8054ec44 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 6 Mar 2010 22:16:38 -0700
Subject: [PATCH] News Observer by Krittika Goyal

---
 resources/recipes/newsobs.recipe | 33 ++++++++++++++++++++++++++++++++
 resources/recipes/sfbg.recipe    | 33 +++++++++++++-------------------
 2 files changed, 46 insertions(+), 20 deletions(-)
 create mode 100644 resources/recipes/newsobs.recipe

diff --git a/resources/recipes/newsobs.recipe b/resources/recipes/newsobs.recipe
new file mode 100644
index 0000000000..bb603a1bf1
--- /dev/null
+++ b/resources/recipes/newsobs.recipe
@@ -0,0 +1,33 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class NewsAndObserver(BasicNewsRecipe):
+    title          = u'News And Observer'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 1 #days
+    max_articles_per_feed = 25
+    #encoding = 'latin1'
+
+    no_stylesheets = True
+    remove_tags_before = dict(name='div', attrs={'id':'story_header'})
+    remove_tags_after  = dict(name='div', attrs={'id':'shirttail'})
+    remove_tags = [
+       dict(name='iframe'),
+       dict(name='div', attrs={'class':['contained_round', 'contained']}),
+       dict(name='div', attrs={'id':['story_tools', 'toolbox', 'shirttail', 'comment_widget', 'stories_widget', 'classifieds_widget', 'most_popular_widget', 'footer']}),
+       #dict(name='ul', attrs={'class':'article-tools'}),
+       dict(name='ul', attrs={'id':'story_tabs'}),
+    ]
+
+
+    feeds = [
+        ('Cover', 'http://www.newsobserver.com/100/index.rss'),
+        ('News', 'http://www.newsobserver.com/102/index.rss'),
+        ('Politics', 'http://www.newsobserver.com/105/index.rss'),
+        ('Business', 'http://www.newsobserver.com/104/index.rss'),
+        ('Sports', 'http://www.newsobserver.com/103/index.rss'),
+        ('College Sports', 'http://www.newsobserver.com/119/index.rss'),
+        ('Lifestyles', 'http://www.newsobserver.com/106/index.rss'),
+        ('Editorials', 'http://www.newsobserver.com/158/index.rss')]
+
+
diff --git a/resources/recipes/sfbg.recipe b/resources/recipes/sfbg.recipe
index 5530bc7163..5c77c96f74 100644
--- a/resources/recipes/sfbg.recipe
+++ b/resources/recipes/sfbg.recipe
@@ -1,42 +1,35 @@
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
 
 class SanFranciscoBayGuardian(BasicNewsRecipe):
     title          = u'San Francisco Bay Guardian'
     language       = 'en'
     __author__     = 'Krittika Goyal'
-    oldest_article = 1 #days
+    oldest_article = 31 #days
     max_articles_per_feed = 25
     #encoding = 'latin1'
 
     no_stylesheets = True
-    remove_tags_before = dict(name='div', attrs={'id':'story_header'})
-    remove_tags_after  = dict(name='div', attrs={'id':'shirttail'})
+    #remove_tags_before = dict(name='div', attrs={'id':'story_header'})
+    #remove_tags_after  = dict(name='div', attrs={'id':'shirttail'})
     remove_tags = [
        dict(name='iframe'),
        #dict(name='div', attrs={'class':'related-articles'}),
-        dict(name='div', attrs={'id':['story_tools', 'toolbox', 'shirttail', 'comment_widget']}),
+        #dict(name='div', attrs={'id':['story_tools', 'toolbox', 'shirttail', 'comment_widget']}),
        #dict(name='ul', attrs={'class':'article-tools'}),
-       dict(name='ul', attrs={'id':'story_tabs'}),
+       #dict(name='ul', attrs={'id':'story_tabs'}),
     ]
 
 
     feeds = [
-        ('Cover', 'http://www.newsobserver.com/100/index.rss'),
-        ('News', 'http://www.newsobserver.com/102/index.rss'),
-        ('Politics', 'http://www.newsobserver.com/105/index.rss'),
-        ('Business', 'http://www.newsobserver.com/104/index.rss'),
-        ('Sports', 'http://www.newsobserver.com/103/index.rss'),
-        ('College Sports', 'http://www.newsobserver.com/119/index.rss'),
-        ('Lifestyles', 'http://www.newsobserver.com/106/index.rss'),
-        ('Editorials', 'http://www.newsobserver.com/158/index.rss')]
+        ('sfbg', 'http://www.sfbg.com/rss.xml'),
+    ]
 
 
-    def preprocess_html(self, soup):
-        story = soup.find(name='div', attrs={'id':'story_body'})
+    #def preprocess_html(self, soup):
+        #story = soup.find(name='div', attrs={'id':'story_body'})
         #td = heading.findParent(name='td')
         #td.extract()
-        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
-        body = soup.find(name='body')
-        body.insert(0, story)
-        return soup
+        #soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
+        #body = soup.find(name='body')
+        #body.insert(0, story)
+        #return soup