mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix New York Times recipe
This commit is contained in:
parent
114c1e799e
commit
b6e55e908d
@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
Unit tests for database layer.
|
Unit tests for database layer.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import sys, unittest, os
|
import sys, unittest, os, cStringIO
|
||||||
from itertools import repeat
|
from itertools import repeat
|
||||||
|
|
||||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||||
@ -60,6 +60,11 @@ class DBTest(unittest.TestCase):
|
|||||||
self.assertEqual(ga(self.m2, p), ga(m2, p))
|
self.assertEqual(ga(self.m2, p), ga(m2, p))
|
||||||
|
|
||||||
self.assertEqual(self.db.format(1, 'txt', index_is_id=True), 'test')
|
self.assertEqual(self.db.format(1, 'txt', index_is_id=True), 'test')
|
||||||
|
self.assertEqual(self.db.formats(1, index_is_id=True), 'TXT')
|
||||||
|
self.db.add_format(1, 'html', cStringIO.StringIO('<html/>'), index_is_id=True)
|
||||||
|
self.assertEqual(self.db.formats(1, index_is_id=True), 'HTML,TXT')
|
||||||
|
self.db.remove_format(1, 'html', index_is_id=True)
|
||||||
|
self.assertEqual(self.db.formats(1, index_is_id=True), 'TXT')
|
||||||
self.assertNotEqual(self.db.cover(1, index_is_id=True), None)
|
self.assertNotEqual(self.db.cover(1, index_is_id=True), None)
|
||||||
self.assertEqual(self.db.cover(2, index_is_id=True), None)
|
self.assertEqual(self.db.cover(2, index_is_id=True), None)
|
||||||
|
|
||||||
@ -76,6 +81,12 @@ class DBTest(unittest.TestCase):
|
|||||||
self.assertEqual('new auth', self.db.authors(2))
|
self.assertEqual('new auth', self.db.authors(2))
|
||||||
self.assertEqual(self.db.format(3, 'txt', index_is_id=True), 'test')
|
self.assertEqual(self.db.format(3, 'txt', index_is_id=True), 'test')
|
||||||
|
|
||||||
|
def testSorting(self):
|
||||||
|
self.db.sort('authors', True)
|
||||||
|
self.assertEqual(self.db.authors(0), 'Test Author 1')
|
||||||
|
self.db.sort('rating', False)
|
||||||
|
self.assertEqual(self.db.rating(0), 3)
|
||||||
|
|
||||||
|
|
||||||
def suite():
|
def suite():
|
||||||
return unittest.TestLoader().loadTestsFromTestCase(DBTest)
|
return unittest.TestLoader().loadTestsFromTestCase(DBTest)
|
||||||
|
@ -5,9 +5,10 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
'''
|
'''
|
||||||
nytimes.com
|
nytimes.com
|
||||||
'''
|
'''
|
||||||
import string
|
import string, re
|
||||||
from calibre import strftime
|
from calibre import strftime
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
class NYTimes(BasicNewsRecipe):
|
class NYTimes(BasicNewsRecipe):
|
||||||
|
|
||||||
@ -57,7 +58,8 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
a = div.find('a', href=True)
|
a = div.find('a', href=True)
|
||||||
if not a:
|
if not a:
|
||||||
continue
|
continue
|
||||||
url = self.print_version(a['href'])
|
url = re.sub(r'\?.*', '', a['href'])
|
||||||
|
url += '?pagewanted=print'
|
||||||
title = self.tag_to_string(a, use_alt=True).strip()
|
title = self.tag_to_string(a, use_alt=True).strip()
|
||||||
description = ''
|
description = ''
|
||||||
pubdate = strftime('%a, %d %b')
|
pubdate = strftime('%a, %d %b')
|
||||||
@ -77,7 +79,10 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def print_version(self, url):
|
def preprocess_html(self, soup):
|
||||||
if url.endswith('?&pagewanted=print'):
|
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
||||||
return url
|
if refresh is None:
|
||||||
return url + '?&pagewanted=print'
|
return soup
|
||||||
|
content = refresh.get('content').partition('=')[2]
|
||||||
|
raw = self.browser.open('http://www.nytimes.com'+content).read()
|
||||||
|
return BeautifulSoup(raw.decode('cp1252', 'replace'))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user