mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Improve encoding detection in NYT recipe
This commit is contained in:
parent
6e6d2096f5
commit
1325367249
@ -27,7 +27,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'side_tool', 'side_index',
|
'side_tool', 'side_index',
|
||||||
'relatedArticles', 'relatedTopics', 'adxSponLink']),
|
'relatedArticles', 'relatedTopics', 'adxSponLink']),
|
||||||
dict(name=['script', 'noscript', 'style'])]
|
dict(name=['script', 'noscript', 'style'])]
|
||||||
encoding = 'cp1252'
|
#encoding = 'cp1252'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
|
extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
|
||||||
|
|
||||||
@ -118,5 +118,5 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
if refresh is None:
|
if refresh is None:
|
||||||
return soup
|
return soup
|
||||||
content = refresh.get('content').partition('=')[2]
|
content = refresh.get('content').partition('=')[2]
|
||||||
raw = self.browser.open('http://www.nytimes.com'+content).read()
|
raw = self.browser.open_novisit('http://www.nytimes.com'+content).read()
|
||||||
return BeautifulSoup(raw.decode('cp1252', 'replace'))
|
return BeautifulSoup(raw.decode('cp1252', 'replace'))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user