From b6e55e908d3a2fbe861cdb455771be27d20e82d5 Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Sat, 1 Nov 2008 19:57:52 -0700
Subject: [PATCH] Fix New York Times recipe
---
src/calibre/library/test.py | 13 ++++++++++++-
src/calibre/web/feeds/recipes/nytimes.py | 17 +++++++++++------
2 files changed, 23 insertions(+), 7 deletions(-)
diff --git a/src/calibre/library/test.py b/src/calibre/library/test.py
index 1a81755971..86363b439c 100644
--- a/src/calibre/library/test.py
+++ b/src/calibre/library/test.py
@@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
Unit tests for database layer.
'''
-import sys, unittest, os
+import sys, unittest, os, cStringIO
from itertools import repeat
from calibre.ptempfile import PersistentTemporaryDirectory
@@ -60,6 +60,11 @@ class DBTest(unittest.TestCase):
self.assertEqual(ga(self.m2, p), ga(m2, p))
self.assertEqual(self.db.format(1, 'txt', index_is_id=True), 'test')
+ self.assertEqual(self.db.formats(1, index_is_id=True), 'TXT')
+ self.db.add_format(1, 'html', cStringIO.StringIO(''), index_is_id=True)
+ self.assertEqual(self.db.formats(1, index_is_id=True), 'HTML,TXT')
+ self.db.remove_format(1, 'html', index_is_id=True)
+ self.assertEqual(self.db.formats(1, index_is_id=True), 'TXT')
self.assertNotEqual(self.db.cover(1, index_is_id=True), None)
self.assertEqual(self.db.cover(2, index_is_id=True), None)
@@ -76,6 +81,12 @@ class DBTest(unittest.TestCase):
self.assertEqual('new auth', self.db.authors(2))
self.assertEqual(self.db.format(3, 'txt', index_is_id=True), 'test')
+ def testSorting(self):
+ self.db.sort('authors', True)
+ self.assertEqual(self.db.authors(0), 'Test Author 1')
+ self.db.sort('rating', False)
+ self.assertEqual(self.db.rating(0), 3)
+
def suite():
return unittest.TestLoader().loadTestsFromTestCase(DBTest)
diff --git a/src/calibre/web/feeds/recipes/nytimes.py b/src/calibre/web/feeds/recipes/nytimes.py
index 17fe1b9b1b..de431d6532 100644
--- a/src/calibre/web/feeds/recipes/nytimes.py
+++ b/src/calibre/web/feeds/recipes/nytimes.py
@@ -5,9 +5,10 @@ __copyright__ = '2008, Kovid Goyal '
'''
nytimes.com
'''
-import string
+import string, re
from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
class NYTimes(BasicNewsRecipe):
@@ -57,7 +58,8 @@ class NYTimes(BasicNewsRecipe):
a = div.find('a', href=True)
if not a:
continue
- url = self.print_version(a['href'])
+ url = re.sub(r'\?.*', '', a['href'])
+ url += '?pagewanted=print'
title = self.tag_to_string(a, use_alt=True).strip()
description = ''
pubdate = strftime('%a, %d %b')
@@ -77,7 +79,10 @@ class NYTimes(BasicNewsRecipe):
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans
- def print_version(self, url):
- if url.endswith('?&pagewanted=print'):
- return url
- return url + '?&pagewanted=print'
+ def preprocess_html(self, soup):
+ refresh = soup.find('meta', {'http-equiv':'refresh'})
+ if refresh is None:
+ return soup
+ content = refresh.get('content').partition('=')[2]
+ raw = self.browser.open('http://www.nytimes.com'+content).read()
+ return BeautifulSoup(raw.decode('cp1252', 'replace'))