mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #4539 (Apostrophes not showing up in NYT recipe)
This commit is contained in:
parent
b542c8a090
commit
8eb3e165a0
@ -10,11 +10,18 @@ from calibre import strftime
|
|||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
def decode(self, src):
|
||||||
|
enc = 'utf-8'
|
||||||
|
if 'iso-8859-1' in src:
|
||||||
|
enc = 'cp1252'
|
||||||
|
return src.decode(enc, 'ignore')
|
||||||
|
|
||||||
class NYTimes(BasicNewsRecipe):
|
class NYTimes(BasicNewsRecipe):
|
||||||
|
|
||||||
title = 'The New York Times (subscription)'
|
title = 'The New York Times (subscription)'
|
||||||
__author__ = 'Kovid Goyal'
|
__author__ = 'Kovid Goyal'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
requires_version = (0, 6, 36)
|
||||||
|
|
||||||
description = 'Daily news from the New York Times (subscription version)'
|
description = 'Daily news from the New York Times (subscription version)'
|
||||||
timefmt = ' [%a, %b %d, %Y]'
|
timefmt = ' [%a, %b %d, %Y]'
|
||||||
@ -27,7 +34,7 @@ class NYTimes(BasicNewsRecipe):
|
|||||||
'side_tool', 'side_index',
|
'side_tool', 'side_index',
|
||||||
'relatedArticles', 'relatedTopics', 'adxSponLink']),
|
'relatedArticles', 'relatedTopics', 'adxSponLink']),
|
||||||
dict(name=['script', 'noscript', 'style'])]
|
dict(name=['script', 'noscript', 'style'])]
|
||||||
#encoding = 'cp1252'
|
encoding = decode
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
|
extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
|
||||||
|
|
||||||
|
@ -66,7 +66,7 @@ class RecipeInput(InputFormatPlugin):
|
|||||||
if recipe.requires_version > numeric_version:
|
if recipe.requires_version > numeric_version:
|
||||||
log.warn(
|
log.warn(
|
||||||
'Downloaded recipe needs calibre version at least: %s' % \
|
'Downloaded recipe needs calibre version at least: %s' % \
|
||||||
recipe.requires_version)
|
('.'.join(recipe.requires_version)))
|
||||||
builtin = True
|
builtin = True
|
||||||
except:
|
except:
|
||||||
log.exception('Failed to compile downloaded recipe. Falling '
|
log.exception('Failed to compile downloaded recipe. Falling '
|
||||||
|
@ -111,7 +111,9 @@ class BasicNewsRecipe(Recipe):
|
|||||||
|
|
||||||
#: Specify an override encoding for sites that have an incorrect
|
#: Specify an override encoding for sites that have an incorrect
|
||||||
#: charset specification. The most common being specifying ``latin1`` and
|
#: charset specification. The most common being specifying ``latin1`` and
|
||||||
#: using ``cp1252``. If None, try to detect the encoding.
|
#: using ``cp1252``. If None, try to detect the encoding. If it is a
|
||||||
|
#: callable, the callable is called with two arguments: The recipe object
|
||||||
|
#: and the source to be decoded. It must return the decoded source.
|
||||||
encoding = None
|
encoding = None
|
||||||
|
|
||||||
#: Normally we try to guess if a feed has full articles embedded in it
|
#: Normally we try to guess if a feed has full articles embedded in it
|
||||||
|
@ -403,7 +403,9 @@ class RecursiveFetcher(object):
|
|||||||
if len(dsrc) == 0 or \
|
if len(dsrc) == 0 or \
|
||||||
len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0:
|
len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0:
|
||||||
raise ValueError('No content at URL %s'%iurl)
|
raise ValueError('No content at URL %s'%iurl)
|
||||||
if self.encoding is not None:
|
if callable(self.encoding):
|
||||||
|
dsrc = self.encoding(dsrc)
|
||||||
|
elif self.encoding is not None:
|
||||||
dsrc = dsrc.decode(self.encoding, 'replace')
|
dsrc = dsrc.decode(self.encoding, 'replace')
|
||||||
else:
|
else:
|
||||||
dsrc = xml_to_unicode(dsrc, self.verbose)[0]
|
dsrc = xml_to_unicode(dsrc, self.verbose)[0]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user