mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix metadata download form Amazon stripping accents from comments text in the binary builds
Seems to be caused by a bug in the lxml version in the builds, which causes it to output unicode chars as entities, which in turn were getting stripped by the sanitize_html() function. Fixes #1825905 [amazon scrapper not retrieving accented caracters](https://bugs.launchpad.net/calibre/+bug/1825905)
This commit is contained in:
parent
ad9627f14a
commit
9714f722e7
@ -17,7 +17,7 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
|
|
||||||
from calibre import as_unicode, browser, random_user_agent
|
from calibre import as_unicode, browser, random_user_agent, xml_replace_entities
|
||||||
from calibre.ebooks.metadata import check_isbn
|
from calibre.ebooks.metadata import check_isbn
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
from calibre.ebooks.metadata.sources.base import Option, Source, fixauthors, fixcase
|
from calibre.ebooks.metadata.sources.base import Option, Source, fixauthors, fixcase
|
||||||
@ -577,6 +577,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
del a.attrib['href']
|
del a.attrib['href']
|
||||||
a.tag = 'span'
|
a.tag = 'span'
|
||||||
desc = self.tostring(desc, method='html', encoding='unicode').strip()
|
desc = self.tostring(desc, method='html', encoding='unicode').strip()
|
||||||
|
desc = xml_replace_entities(desc, 'utf-8')
|
||||||
|
|
||||||
# Encoding bug in Amazon data U+fffd (replacement char)
|
# Encoding bug in Amazon data U+fffd (replacement char)
|
||||||
# in some examples it is present in place of '
|
# in some examples it is present in place of '
|
||||||
@ -862,7 +863,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
class Amazon(Source):
|
class Amazon(Source):
|
||||||
|
|
||||||
name = 'Amazon.com'
|
name = 'Amazon.com'
|
||||||
version = (1, 2, 8)
|
version = (1, 2, 9)
|
||||||
minimum_calibre_version = (2, 82, 0)
|
minimum_calibre_version = (2, 82, 0)
|
||||||
description = _('Downloads metadata and covers from Amazon')
|
description = _('Downloads metadata and covers from Amazon')
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user