Fix New York Times recipe

2025-07-09 03:04:10 -04:00 · 2008-11-01 19:57:52 -07:00 · 2008-11-01 19:57:52 -07:00 · b6e55e908d
commit b6e55e908d
parent 114c1e799e
2 changed files with 23 additions and 7 deletions
--- a/src/calibre/library/test.py
+++ b/src/calibre/library/test.py
@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
 Unit tests for database layer.
 '''
-import sys, unittest, os
+import sys, unittest, os, cStringIO
 from itertools import repeat
 from calibre.ptempfile import PersistentTemporaryDirectory
@ -60,6 +60,11 @@ class DBTest(unittest.TestCase):
            self.assertEqual(ga(self.m2, p), ga(m2, p))
        self.assertEqual(self.db.format(1, 'txt', index_is_id=True), 'test')
        self.assertEqual(self.db.formats(1, index_is_id=True), 'TXT')
        self.db.add_format(1, 'html', cStringIO.StringIO('<html/>'), index_is_id=True)
        self.assertEqual(self.db.formats(1, index_is_id=True), 'HTML,TXT')
        self.db.remove_format(1, 'html', index_is_id=True)
        self.assertEqual(self.db.formats(1, index_is_id=True), 'TXT')
        self.assertNotEqual(self.db.cover(1, index_is_id=True), None)
        self.assertEqual(self.db.cover(2, index_is_id=True), None)
@ -76,6 +81,12 @@ class DBTest(unittest.TestCase):
        self.assertEqual('new auth', self.db.authors(2))
        self.assertEqual(self.db.format(3, 'txt', index_is_id=True), 'test')
    def testSorting(self):
        self.db.sort('authors', True)
        self.assertEqual(self.db.authors(0), 'Test Author 1')
        self.db.sort('rating', False)
        self.assertEqual(self.db.rating(0), 3)
 def suite():
    return unittest.TestLoader().loadTestsFromTestCase(DBTest)
--- a/src/calibre/web/feeds/recipes/nytimes.py
+++ b/src/calibre/web/feeds/recipes/nytimes.py
@ -5,9 +5,10 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 '''
 nytimes.com
 '''
-import string
+import string, re
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 class NYTimes(BasicNewsRecipe):
@ -57,7 +58,8 @@ class NYTimes(BasicNewsRecipe):
                a = div.find('a', href=True)
                if not a:
                    continue
-                url = self.print_version(a['href'])
+                url = re.sub(r'\?.*', '', a['href'])
                url += '?pagewanted=print'
                title = self.tag_to_string(a, use_alt=True).strip()
                description = ''
                pubdate = strftime('%a, %d %b')
@ -77,7 +79,10 @@ class NYTimes(BasicNewsRecipe):
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
        return ans
-    def print_version(self, url):
+    def preprocess_html(self, soup):
-        if url.endswith('?&pagewanted=print'):
+        refresh = soup.find('meta', {'http-equiv':'refresh'})
-            return url
+        if refresh is None:
-        return url + '?&pagewanted=print'
+            return soup
        content = refresh.get('content').partition('=')[2]
        raw = self.browser.open('http://www.nytimes.com'+content).read()
        return BeautifulSoup(raw.decode('cp1252', 'replace'))