Improved recipe for The NEw York Review of Books

2025-12-09 14:45:01 -05:00 · 2008-09-24 18:25:27 -07:00 · 2008-09-24 18:25:27 -07:00 · d84fd43c5a
commit d84fd43c5a
parent 81b027e023
2 changed files with 51 additions and 1 deletions
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -9,7 +9,7 @@ recipes = [
           'nytimes', 'usatoday', 'outlook_india', 'bbc', 'greader', 'wsj',
           'wired', 'globe_and_mail', 'smh', 'espn', 'business_week',
           'ars_technica', 'upi', 'new_yorker', 'irish_times', 'iht',
-           'discover_magazine', 'scientific_american',
+           'discover_magazine', 'scientific_american', 'new_york_review_of_books',
          ]

 import re, imp, inspect, time
--- a/src/calibre/web/feeds/recipes/new_york_review_of_books.py
+++ b/src/calibre/web/feeds/recipes/new_york_review_of_books.py
@ -0,0 +1,50 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+nybooks.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+from lxml import html
+from calibre.constants import preferred_encoding
+
+class NewYorkReviewOfBooks(BasicNewsRecipe):
+    
+    title = u'New York Review of Books'
+    description = u'Book reviews'
+    __author__ = 'Kovid Goyal' 
+    
+    remove_tags_before = {'id':'container'}
+    remove_tags = [{'class':['noprint', 'ad', 'footer']}, {'id':'right-content'}]
+    
+    def parse_index(self):
+        root = html.fromstring(self.browser.open('http://www.nybooks.com/current-issue').read())
+        date = root.xpath('//h4[@class = "date"]')[0]
+        self.timefmt = ' ['+date.text.encode(preferred_encoding)+']'
+        articles = []
+        for tag in date.itersiblings():
+            if tag.tag == 'h4': break
+            if tag.tag == 'p':
+                if tag.get('class') == 'indented':
+                    articles[-1]['description'] += html.tostring(tag)
+                else:
+                    href = tag.xpath('descendant::a[@href]')[0].get('href')
+                    article = {
+                               'title': u''.join(tag.xpath('descendant::text()')),
+                               'date' : '',
+                               'url'  : 'http://www.nybooks.com'+href,
+                               'description': '',
+                               }
+                    articles.append(article)
+                    
+        return [('Current Issue', articles)]
+                    
+            
+        
+        
+        
+        
+