From ec95fab48256b9c1944c979f1cd940ae89e5f332 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 12 Mar 2009 16:27:09 -0700 Subject: [PATCH] Add a no subscription needed version of the recipe for The Hew York Review of Books --- src/calibre/web/feeds/recipes/__init__.py | 1 + .../recipe_new_york_review_of_books_no_sub.py | 44 +++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 src/calibre/web/feeds/recipes/recipe_new_york_review_of_books_no_sub.py diff --git a/src/calibre/web/feeds/recipes/__init__.py b/src/calibre/web/feeds/recipes/__init__.py index b3c3c4605b..65bb389903 100644 --- a/src/calibre/web/feeds/recipes/__init__.py +++ b/src/calibre/web/feeds/recipes/__init__.py @@ -34,6 +34,7 @@ recipe_modules = ['recipe_' + r for r in ( 'al_jazeera', 'winsupersite', 'borba', 'courrierinternational', 'lamujerdemivida', 'soldiers', 'theonion', 'news_times', 'el_universal', 'mediapart', 'wikinews_en', 'ecogeek', 'daily_mail', + 'new_york_review_of_books_no_sub', )] import re, imp, inspect, time, os diff --git a/src/calibre/web/feeds/recipes/recipe_new_york_review_of_books_no_sub.py b/src/calibre/web/feeds/recipes/recipe_new_york_review_of_books_no_sub.py new file mode 100644 index 0000000000..d4aeba3537 --- /dev/null +++ b/src/calibre/web/feeds/recipes/recipe_new_york_review_of_books_no_sub.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' +__docformat__ = 'restructuredtext en' + +''' +nybooks.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from lxml import html +from calibre.constants import preferred_encoding + +class NewYorkReviewOfBooks(BasicNewsRecipe): + + title = u'New York Review of Books (no subscription)' + description = u'Book reviews' + language = _('English') + __author__ = 'Kovid Goyal' + remove_tags_before = {'id':'container'} + remove_tags = [{'class':['noprint', 'ad', 'footer']}, {'id':'right-content'}] + + def parse_index(self): + root = html.fromstring(self.browser.open('http://www.nybooks.com/current-issue').read()) + date = root.xpath('//h4[@class = "date"]')[0] + self.timefmt = ' ['+date.text.encode(preferred_encoding)+']' + articles = [] + for tag in date.itersiblings(): + if tag.tag == 'h4': break + if tag.tag == 'p': + if tag.get('class') == 'indented': + articles[-1]['description'] += html.tostring(tag) + else: + href = tag.xpath('descendant::a[@href]')[0].get('href') + article = { + 'title': u''.join(tag.xpath('descendant::text()')), + 'date' : '', + 'url' : 'http://www.nybooks.com'+href, + 'description': '', + } + articles.append(article) + + return [('Current Issue', articles)] +