From b6e55e908d3a2fbe861cdb455771be27d20e82d5 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 1 Nov 2008 19:57:52 -0700 Subject: [PATCH] Fix New York Times recipe --- src/calibre/library/test.py | 13 ++++++++++++- src/calibre/web/feeds/recipes/nytimes.py | 17 +++++++++++------ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/calibre/library/test.py b/src/calibre/library/test.py index 1a81755971..86363b439c 100644 --- a/src/calibre/library/test.py +++ b/src/calibre/library/test.py @@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en' Unit tests for database layer. ''' -import sys, unittest, os +import sys, unittest, os, cStringIO from itertools import repeat from calibre.ptempfile import PersistentTemporaryDirectory @@ -60,6 +60,11 @@ class DBTest(unittest.TestCase): self.assertEqual(ga(self.m2, p), ga(m2, p)) self.assertEqual(self.db.format(1, 'txt', index_is_id=True), 'test') + self.assertEqual(self.db.formats(1, index_is_id=True), 'TXT') + self.db.add_format(1, 'html', cStringIO.StringIO(''), index_is_id=True) + self.assertEqual(self.db.formats(1, index_is_id=True), 'HTML,TXT') + self.db.remove_format(1, 'html', index_is_id=True) + self.assertEqual(self.db.formats(1, index_is_id=True), 'TXT') self.assertNotEqual(self.db.cover(1, index_is_id=True), None) self.assertEqual(self.db.cover(2, index_is_id=True), None) @@ -76,6 +81,12 @@ class DBTest(unittest.TestCase): self.assertEqual('new auth', self.db.authors(2)) self.assertEqual(self.db.format(3, 'txt', index_is_id=True), 'test') + def testSorting(self): + self.db.sort('authors', True) + self.assertEqual(self.db.authors(0), 'Test Author 1') + self.db.sort('rating', False) + self.assertEqual(self.db.rating(0), 3) + def suite(): return unittest.TestLoader().loadTestsFromTestCase(DBTest) diff --git a/src/calibre/web/feeds/recipes/nytimes.py b/src/calibre/web/feeds/recipes/nytimes.py index 17fe1b9b1b..de431d6532 100644 --- a/src/calibre/web/feeds/recipes/nytimes.py +++ b/src/calibre/web/feeds/recipes/nytimes.py @@ -5,9 +5,10 @@ __copyright__ = '2008, Kovid Goyal ' ''' nytimes.com ''' -import string +import string, re from calibre import strftime from calibre.web.feeds.recipes import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup class NYTimes(BasicNewsRecipe): @@ -57,7 +58,8 @@ class NYTimes(BasicNewsRecipe): a = div.find('a', href=True) if not a: continue - url = self.print_version(a['href']) + url = re.sub(r'\?.*', '', a['href']) + url += '?pagewanted=print' title = self.tag_to_string(a, use_alt=True).strip() description = '' pubdate = strftime('%a, %d %b') @@ -77,7 +79,10 @@ class NYTimes(BasicNewsRecipe): ans = [(key, articles[key]) for key in ans if articles.has_key(key)] return ans - def print_version(self, url): - if url.endswith('?&pagewanted=print'): - return url - return url + '?&pagewanted=print' + def preprocess_html(self, soup): + refresh = soup.find('meta', {'http-equiv':'refresh'}) + if refresh is None: + return soup + content = refresh.get('content').partition('=')[2] + raw = self.browser.open('http://www.nytimes.com'+content).read() + return BeautifulSoup(raw.decode('cp1252', 'replace'))