From 556d8971d2246c9661138907b962f3cc42178ebf Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 3 Mar 2010 18:31:23 -0700
Subject: [PATCH] Smithsonian Magazine by Krittika Goyal

---
 resources/recipes/smith.recipe | 52 ++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 resources/recipes/smith.recipe
diff --git a/resources/recipes/smith.recipe b/resources/recipes/smith.recipe
new file mode 100644
index 0000000000..e52b2ee709
--- /dev/null
+++ b/resources/recipes/smith.recipe
@@ -0,0 +1,52 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class SmithsonianMagazine(BasicNewsRecipe):
+    title          = u'Smithsonian Magazine'
+    language       = 'en'
+    __author__     = 'Krittika Goyal'
+    oldest_article = 31#days
+    max_articles_per_feed = 50
+    #encoding = 'latin1'
+    recursions = 1
+    match_regexps = ['&page=[2-9]$']
+
+    remove_stylesheets = True
+    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
+    remove_tags_after  = dict(name='p', attrs={'id':'articlePaginationWrapper'})
+    remove_tags = [
+       dict(name='iframe'),
+       dict(name='div', attrs={'class':'article_sidebar_border'}),
+       dict(name='div', attrs={'id':['article_sidebar_border', 'most-popular_large']}),
+       #dict(name='ul', attrs={'class':'article-tools'}),
+       dict(name='ul', attrs={'class':'cat-breadcrumb col three last'}),
+    ]
+
+
+    feeds          = [
+('History and Archeology',
+ 'http://feeds.feedburner.com/smithsonianmag/history-archaeology'),
+('People and Places',
+ 'http://feeds.feedburner.com/smithsonianmag/people-places'),
+('Science and Nature',
+ 'http://feeds.feedburner.com/smithsonianmag/science-nature'),
+('Arts and Culture',
+ 'http://feeds.feedburner.com/smithsonianmag/arts-culture'),
+('Travel',
+ 'http://feeds.feedburner.com/smithsonianmag/travel'),
+]
+
+    def preprocess_html(self, soup):
+        story = soup.find(name='div', attrs={'id':'article-left'})
+        #td = heading.findParent(name='td')
+        #td.extract()
+        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
+        body = soup.find(name='body')
+        body.insert(0, story)
+        return soup
+
+    def postprocess_html(self, soup, first):
+        for p in soup.findAll(id='articlePaginationWrapper'): p.extract()
+        if not first:
+             for div in soup.findAll(id='article-head'): div.extract()
+        return soup