mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #4539 (Apostrophes not showing up in NYT recipe)
This commit is contained in:
parent
b542c8a090
commit
8eb3e165a0
@ -10,11 +10,18 @@ from calibre import strftime
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
def decode(self, src):
|
||||
enc = 'utf-8'
|
||||
if 'iso-8859-1' in src:
|
||||
enc = 'cp1252'
|
||||
return src.decode(enc, 'ignore')
|
||||
|
||||
class NYTimes(BasicNewsRecipe):
|
||||
|
||||
title = 'The New York Times (subscription)'
|
||||
__author__ = 'Kovid Goyal'
|
||||
language = 'en'
|
||||
requires_version = (0, 6, 36)
|
||||
|
||||
description = 'Daily news from the New York Times (subscription version)'
|
||||
timefmt = ' [%a, %b %d, %Y]'
|
||||
@ -27,7 +34,7 @@ class NYTimes(BasicNewsRecipe):
|
||||
'side_tool', 'side_index',
|
||||
'relatedArticles', 'relatedTopics', 'adxSponLink']),
|
||||
dict(name=['script', 'noscript', 'style'])]
|
||||
#encoding = 'cp1252'
|
||||
encoding = decode
|
||||
no_stylesheets = True
|
||||
extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'
|
||||
|
||||
|
@ -66,7 +66,7 @@ class RecipeInput(InputFormatPlugin):
|
||||
if recipe.requires_version > numeric_version:
|
||||
log.warn(
|
||||
'Downloaded recipe needs calibre version at least: %s' % \
|
||||
recipe.requires_version)
|
||||
('.'.join(recipe.requires_version)))
|
||||
builtin = True
|
||||
except:
|
||||
log.exception('Failed to compile downloaded recipe. Falling '
|
||||
|
@ -111,7 +111,9 @@ class BasicNewsRecipe(Recipe):
|
||||
|
||||
#: Specify an override encoding for sites that have an incorrect
|
||||
#: charset specification. The most common being specifying ``latin1`` and
|
||||
#: using ``cp1252``. If None, try to detect the encoding.
|
||||
#: using ``cp1252``. If None, try to detect the encoding. If it is a
|
||||
#: callable, the callable is called with two arguments: The recipe object
|
||||
#: and the source to be decoded. It must return the decoded source.
|
||||
encoding = None
|
||||
|
||||
#: Normally we try to guess if a feed has full articles embedded in it
|
||||
|
@ -403,7 +403,9 @@ class RecursiveFetcher(object):
|
||||
if len(dsrc) == 0 or \
|
||||
len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0:
|
||||
raise ValueError('No content at URL %s'%iurl)
|
||||
if self.encoding is not None:
|
||||
if callable(self.encoding):
|
||||
dsrc = self.encoding(dsrc)
|
||||
elif self.encoding is not None:
|
||||
dsrc = dsrc.decode(self.encoding, 'replace')
|
||||
else:
|
||||
dsrc = xml_to_unicode(dsrc, self.verbose)[0]
|
||||
|
Loading…
x
Reference in New Issue
Block a user