From 6f2c5a1f4d4e82561a68462698af6f944463a02c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 7 Mar 2013 14:35:31 +0530
Subject: [PATCH] Update New York Times Book Review

---
 recipes/nytimesbook.recipe | 58 ++++++++++++++++++++++++++++++--------
 1 file changed, 46 insertions(+), 12 deletions(-)

diff --git a/recipes/nytimesbook.recipe b/recipes/nytimesbook.recipe
index 5388da9dcb..6c47059e78 100644
--- a/recipes/nytimesbook.recipe
+++ b/recipes/nytimesbook.recipe
@@ -1,21 +1,55 @@
+
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class NewYorkTimesBookReview(BasicNewsRecipe):
     title          = u'New York Times Book Review'
     language       = 'en'
-    __author__     = 'Krittika Goyal'
-    oldest_article = 8 #days
-    max_articles_per_feed = 1000
-    #recursions = 2
-    #encoding = 'latin1'
-    use_embedded_content = False
+    __author__     = 'Kovid Goyal'
+
 
     no_stylesheets = True
-    auto_cleanup = True
+    no_javascript = True
+    keep_only_tags = [dict(id='article'), dict(id=lambda x:x and x.startswith('entry-'))]
+    remove_tags = [
+        dict(attrs={'class':['articleBottomExtra', 'shareToolsBox', 'singleAd']}),
+        dict(attrs={'class':lambda x: x and ('shareTools' in x or 'enlargeThis' in x)}),
+    ]
+
+    def parse_index(self):
+        soup = self.index_to_soup('http://www.nytimes.com/pages/books/review/index.html')
+
+        # Find TOC
+        toc = soup.find('div', id='main').find(
+            'div', attrs={'class':'abColumn'})
+        feeds = []
+        articles = []
+        section_title = 'Features'
+        for x in toc.findAll(['div', 'h3', 'h6'], attrs={'class':['story', 'sectionHeader', 'ledeStory']}):
+            if x['class'] == 'sectionHeader':
+                if articles:
+                    feeds.append((section_title, articles))
+                section_title = self.tag_to_string(x)
+                articles = []
+                self.log('Found section:', section_title)
+                continue
+            if x['class'] in {'story', 'ledeStory'}:
+                tt = 'h3' if x['class'] == 'story' else 'h1'
+                a = x.find(tt).find('a', href=True)
+                title = self.tag_to_string(a)
+                url = a['href'] + '&pagewanted=all'
+                self.log('\tFound article:', title, url)
+                desc = ''
+                byline = x.find('h6', attrs={'class':'byline'})
+                if byline is not None:
+                    desc = self.tag_to_string(byline)
+                summary = x.find('p', attrs={'class':'summary'})
+                if summary is not None:
+                    desc += self.tag_to_string(summary)
+                if desc:
+                    self.log('\t\t', desc)
+                articles.append({'title':title, 'url':url, 'date':'',
+                    'description':desc})
+
+        return feeds
 
 
-    feeds          = [
-                     ('New York Times Sunday Book Review',
-                      'http://feeds.nytimes.com/nyt/rss/SundayBookReview'),
-                     ]
-