Fix New York Times recipe

This commit is contained in:
Kovid Goyal 2008-11-01 19:57:52 -07:00
parent 114c1e799e
commit b6e55e908d
2 changed files with 23 additions and 7 deletions

View File

@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
Unit tests for database layer. Unit tests for database layer.
''' '''
import sys, unittest, os import sys, unittest, os, cStringIO
from itertools import repeat from itertools import repeat
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
@ -60,6 +60,11 @@ class DBTest(unittest.TestCase):
self.assertEqual(ga(self.m2, p), ga(m2, p)) self.assertEqual(ga(self.m2, p), ga(m2, p))
self.assertEqual(self.db.format(1, 'txt', index_is_id=True), 'test') self.assertEqual(self.db.format(1, 'txt', index_is_id=True), 'test')
self.assertEqual(self.db.formats(1, index_is_id=True), 'TXT')
self.db.add_format(1, 'html', cStringIO.StringIO('<html/>'), index_is_id=True)
self.assertEqual(self.db.formats(1, index_is_id=True), 'HTML,TXT')
self.db.remove_format(1, 'html', index_is_id=True)
self.assertEqual(self.db.formats(1, index_is_id=True), 'TXT')
self.assertNotEqual(self.db.cover(1, index_is_id=True), None) self.assertNotEqual(self.db.cover(1, index_is_id=True), None)
self.assertEqual(self.db.cover(2, index_is_id=True), None) self.assertEqual(self.db.cover(2, index_is_id=True), None)
@ -76,6 +81,12 @@ class DBTest(unittest.TestCase):
self.assertEqual('new auth', self.db.authors(2)) self.assertEqual('new auth', self.db.authors(2))
self.assertEqual(self.db.format(3, 'txt', index_is_id=True), 'test') self.assertEqual(self.db.format(3, 'txt', index_is_id=True), 'test')
def testSorting(self):
self.db.sort('authors', True)
self.assertEqual(self.db.authors(0), 'Test Author 1')
self.db.sort('rating', False)
self.assertEqual(self.db.rating(0), 3)
def suite(): def suite():
return unittest.TestLoader().loadTestsFromTestCase(DBTest) return unittest.TestLoader().loadTestsFromTestCase(DBTest)

View File

@ -5,9 +5,10 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' '''
nytimes.com nytimes.com
''' '''
import string import string, re
from calibre import strftime from calibre import strftime
from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
class NYTimes(BasicNewsRecipe): class NYTimes(BasicNewsRecipe):
@ -57,7 +58,8 @@ class NYTimes(BasicNewsRecipe):
a = div.find('a', href=True) a = div.find('a', href=True)
if not a: if not a:
continue continue
url = self.print_version(a['href']) url = re.sub(r'\?.*', '', a['href'])
url += '?pagewanted=print'
title = self.tag_to_string(a, use_alt=True).strip() title = self.tag_to_string(a, use_alt=True).strip()
description = '' description = ''
pubdate = strftime('%a, %d %b') pubdate = strftime('%a, %d %b')
@ -77,7 +79,10 @@ class NYTimes(BasicNewsRecipe):
ans = [(key, articles[key]) for key in ans if articles.has_key(key)] ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return ans return ans
def print_version(self, url): def preprocess_html(self, soup):
if url.endswith('?&pagewanted=print'): refresh = soup.find('meta', {'http-equiv':'refresh'})
return url if refresh is None:
return url + '?&pagewanted=print' return soup
content = refresh.get('content').partition('=')[2]
raw = self.browser.open('http://www.nytimes.com'+content).read()
return BeautifulSoup(raw.decode('cp1252', 'replace'))