mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #1880195 [Amazon_de metadata: Problems with Umlauts or accents](https://bugs.launchpad.net/calibre/+bug/1880195)
This commit is contained in:
parent
12f2a2f713
commit
e819b62e0b
@ -23,6 +23,14 @@ from calibre.ebooks.metadata.book.base import Metadata
|
|||||||
from calibre.ebooks.metadata.sources.base import Option, Source, fixauthors, fixcase
|
from calibre.ebooks.metadata.sources.base import Option, Source, fixauthors, fixcase
|
||||||
from calibre.utils.localization import canonicalize_lang
|
from calibre.utils.localization import canonicalize_lang
|
||||||
from calibre.utils.random_ua import accept_header_for_ua
|
from calibre.utils.random_ua import accept_header_for_ua
|
||||||
|
from calibre.ebooks.oeb.base import urlquote
|
||||||
|
|
||||||
|
|
||||||
|
def iri_quote_plus(url):
|
||||||
|
ans = urlquote(url)
|
||||||
|
if isinstance(ans, bytes):
|
||||||
|
ans = ans.decode('utf-8')
|
||||||
|
return ans.replace('%20', '+')
|
||||||
|
|
||||||
|
|
||||||
def user_agent_is_ok(ua):
|
def user_agent_is_ok(ua):
|
||||||
@ -895,7 +903,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
class Amazon(Source):
|
class Amazon(Source):
|
||||||
|
|
||||||
name = 'Amazon.com'
|
name = 'Amazon.com'
|
||||||
version = (1, 2, 12)
|
version = (1, 2, 13)
|
||||||
minimum_calibre_version = (2, 82, 0)
|
minimum_calibre_version = (2, 82, 0)
|
||||||
description = _('Downloads metadata and covers from Amazon')
|
description = _('Downloads metadata and covers from Amazon')
|
||||||
|
|
||||||
@ -1109,9 +1117,9 @@ class Amazon(Source):
|
|||||||
def create_query(self, log, title=None, authors=None, identifiers={}, # {{{
|
def create_query(self, log, title=None, authors=None, identifiers={}, # {{{
|
||||||
domain=None, for_amazon=True):
|
domain=None, for_amazon=True):
|
||||||
try:
|
try:
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode, unquote_plus
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from urllib import urlencode
|
from urllib import urlencode, unquote_plus
|
||||||
if domain is None:
|
if domain is None:
|
||||||
domain = self.domain
|
domain = self.domain
|
||||||
|
|
||||||
@ -1165,8 +1173,8 @@ class Amazon(Source):
|
|||||||
if not for_amazon:
|
if not for_amazon:
|
||||||
return terms, domain
|
return terms, domain
|
||||||
|
|
||||||
# magic parameter to enable Japanese Shift_JIS encoding.
|
|
||||||
if domain == 'jp':
|
if domain == 'jp':
|
||||||
|
# magic parameter to enable Japanese Shift_JIS encoding.
|
||||||
q['__mk_ja_JP'] = 'カタカナ'
|
q['__mk_ja_JP'] = 'カタカナ'
|
||||||
if domain == 'nl':
|
if domain == 'nl':
|
||||||
q['__mk_nl_NL'] = 'ÅMÅŽÕÑ'
|
q['__mk_nl_NL'] = 'ÅMÅŽÕÑ'
|
||||||
@ -1176,17 +1184,19 @@ class Amazon(Source):
|
|||||||
q['field-keywords'] += ' ' + q.pop(f, '')
|
q['field-keywords'] += ' ' + q.pop(f, '')
|
||||||
q['field-keywords'] = q['field-keywords'].strip()
|
q['field-keywords'] = q['field-keywords'].strip()
|
||||||
|
|
||||||
if domain == 'jp':
|
encode_to = 'Shift_JIS' if domain == 'jp' else 'utf-8'
|
||||||
encode_to = 'Shift_JIS'
|
|
||||||
elif domain == 'nl' or domain == 'cn':
|
|
||||||
encode_to = 'utf-8'
|
|
||||||
else:
|
|
||||||
encode_to = 'latin1'
|
|
||||||
encoded_q = dict([(x.encode(encode_to, 'ignore'), y.encode(encode_to,
|
encoded_q = dict([(x.encode(encode_to, 'ignore'), y.encode(encode_to,
|
||||||
'ignore')) for x, y in
|
'ignore')) for x, y in q.items()])
|
||||||
q.items()])
|
url_query = urlencode(encoded_q)
|
||||||
|
if encode_to == 'utf-8':
|
||||||
|
# amazon's servers want IRIs with unicode characters not percent esaped
|
||||||
|
parts = []
|
||||||
|
for x in url_query.split(b'&' if isinstance(url_query, bytes) else '&'):
|
||||||
|
k, v = x.split(b'=' if isinstance(x, bytes) else '=', 1)
|
||||||
|
parts.append('{}={}'.format(iri_quote_plus(unquote_plus(k)), iri_quote_plus(unquote_plus(v))))
|
||||||
|
url_query = '&'.join(parts)
|
||||||
url = 'https://www.amazon.%s/s/?' % self.get_website_domain(
|
url = 'https://www.amazon.%s/s/?' % self.get_website_domain(
|
||||||
domain) + urlencode(encoded_q)
|
domain) + url_query
|
||||||
return url, domain
|
return url, domain
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
@ -1581,6 +1591,15 @@ def manual_tests(domain, **kw): # {{{
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
all_tests['de'] = [ # {{{
|
all_tests['de'] = [ # {{{
|
||||||
|
( # umlaut in title/authors
|
||||||
|
{'title': 'Flüsternde Wälder',
|
||||||
|
'authors': ['Nicola Förg']},
|
||||||
|
[title_test('Flüsternde Wälder'),
|
||||||
|
authors_test(['Nicola Förg'])
|
||||||
|
]
|
||||||
|
),
|
||||||
|
|
||||||
|
|
||||||
(
|
(
|
||||||
{'identifiers': {'isbn': '9783453314979'}},
|
{'identifiers': {'isbn': '9783453314979'}},
|
||||||
[title_test('Die letzten Wächter: Roman',
|
[title_test('Die letzten Wächter: Roman',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user