mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
EPUB/AZW3 Output: Fix regression that caused erros when trying to convert documents that have URLs with invalid (non-utf-8) quoting. Fixes #1181049 (in vers .9.30 I can't download WSJ but works fine in earlier versions)
This commit is contained in:
parent
55808e5f68
commit
16c5f8b1c1
@ -373,7 +373,7 @@ def urlquote(href):
|
||||
result.append(char)
|
||||
return ''.join(result)
|
||||
|
||||
def urlunquote(href):
|
||||
def urlunquote(href, error_handling='strict'):
|
||||
# unquote must run on a bytestring and will return a bytestring
|
||||
# If it runs on a unicode object, it returns a double encoded unicode
|
||||
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
|
||||
@ -383,7 +383,10 @@ def urlunquote(href):
|
||||
href = href.encode('utf-8')
|
||||
href = unquote(href)
|
||||
if want_unicode:
|
||||
href = href.decode('utf-8')
|
||||
# The quoted characters could have been in some encoding other than
|
||||
# UTF-8, this often happens with old/broken web servers. There is no
|
||||
# way to know what that encoding should be in this context.
|
||||
href = href.decode('utf-8', error_handling)
|
||||
return href
|
||||
|
||||
def urlnormalize(href):
|
||||
|
@ -159,7 +159,11 @@ class Split(object):
|
||||
except ValueError:
|
||||
# Unparseable URL
|
||||
return url
|
||||
href = urlnormalize(href)
|
||||
try:
|
||||
href = urlnormalize(href)
|
||||
except ValueError:
|
||||
# href has non utf-8 quoting
|
||||
return url
|
||||
if href in self.map:
|
||||
anchor_map = self.map[href]
|
||||
nhref = anchor_map[frag if frag else None]
|
||||
|
Loading…
x
Reference in New Issue
Block a user