Fix #4539 (Apostrophes not showing up in NYT recipe)

2025-07-09 03:04:10 -04:00 · 2010-01-25 10:04:30 -07:00 · 2010-01-25 10:04:30 -07:00 · 8eb3e165a0
commit 8eb3e165a0
parent b542c8a090
4 changed files with 15 additions and 4 deletions
--- a/resources/recipes/nytimes_sub.recipe
+++ b/resources/recipes/nytimes_sub.recipe
@ -10,11 +10,18 @@ from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup

+def decode(self, src):
+    enc = 'utf-8'
+    if 'iso-8859-1' in src:
+        enc = 'cp1252'
+    return src.decode(enc, 'ignore')
+
 class NYTimes(BasicNewsRecipe):

    title       = 'The New York Times (subscription)'
    __author__  = 'Kovid Goyal'
    language = 'en'
+    requires_version = (0, 6, 36)

    description = 'Daily news from the New York Times (subscription version)'
    timefmt  = ' [%a, %b %d, %Y]'
@ -27,7 +34,7 @@ class NYTimes(BasicNewsRecipe):
                       'side_tool', 'side_index',
                       'relatedArticles', 'relatedTopics', 'adxSponLink']),
                   dict(name=['script', 'noscript', 'style'])]
-    #encoding = 'cp1252'
+    encoding = decode
    no_stylesheets = True
    extra_css = 'h1 {font: sans-serif large;}\n.byline {font:monospace;}'

--- a/src/calibre/web/feeds/input.py
+++ b/src/calibre/web/feeds/input.py
@ -66,7 +66,7 @@ class RecipeInput(InputFormatPlugin):
                if recipe.requires_version > numeric_version:
                    log.warn(
                    'Downloaded recipe needs calibre version at least: %s' % \
-                    recipe.requires_version)
+                    ('.'.join(recipe.requires_version)))
                    builtin = True
            except:
                log.exception('Failed to compile downloaded recipe. Falling '
--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -111,7 +111,9 @@ class BasicNewsRecipe(Recipe):

    #: Specify an override encoding for sites that have an incorrect
    #: charset specification. The most common being specifying ``latin1`` and
-    #: using ``cp1252``. If None, try to detect the encoding.
+    #: using ``cp1252``. If None, try to detect the encoding. If it is a
+    #: callable, the callable is called with two arguments: The recipe object
+    #: and the source to be decoded. It must return the decoded source.
    encoding               = None

    #: Normally we try to guess if a feed has full articles embedded in it
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@ -403,7 +403,9 @@ class RecursiveFetcher(object):
                    if len(dsrc) == 0 or \
                       len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0:
                        raise ValueError('No content at URL %s'%iurl)
-                    if self.encoding is not None:
+                    if callable(self.encoding):
+                        dsrc = self.encoding(dsrc)
+                    elif self.encoding is not None:
                        dsrc = dsrc.decode(self.encoding, 'replace')
                    else:
                        dsrc = xml_to_unicode(dsrc, self.verbose)[0]