mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
EPUB/AZW3 Output: Fix regression that caused erros when trying to convert documents that have URLs with invalid (non-utf-8) quoting. Fixes #1181049 (in vers .9.30 I can't download WSJ but works fine in earlier versions)
This commit is contained in:
parent
55808e5f68
commit
16c5f8b1c1
@ -373,7 +373,7 @@ def urlquote(href):
|
|||||||
result.append(char)
|
result.append(char)
|
||||||
return ''.join(result)
|
return ''.join(result)
|
||||||
|
|
||||||
def urlunquote(href):
|
def urlunquote(href, error_handling='strict'):
|
||||||
# unquote must run on a bytestring and will return a bytestring
|
# unquote must run on a bytestring and will return a bytestring
|
||||||
# If it runs on a unicode object, it returns a double encoded unicode
|
# If it runs on a unicode object, it returns a double encoded unicode
|
||||||
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
|
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
|
||||||
@ -383,7 +383,10 @@ def urlunquote(href):
|
|||||||
href = href.encode('utf-8')
|
href = href.encode('utf-8')
|
||||||
href = unquote(href)
|
href = unquote(href)
|
||||||
if want_unicode:
|
if want_unicode:
|
||||||
href = href.decode('utf-8')
|
# The quoted characters could have been in some encoding other than
|
||||||
|
# UTF-8, this often happens with old/broken web servers. There is no
|
||||||
|
# way to know what that encoding should be in this context.
|
||||||
|
href = href.decode('utf-8', error_handling)
|
||||||
return href
|
return href
|
||||||
|
|
||||||
def urlnormalize(href):
|
def urlnormalize(href):
|
||||||
|
@ -159,7 +159,11 @@ class Split(object):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
# Unparseable URL
|
# Unparseable URL
|
||||||
return url
|
return url
|
||||||
href = urlnormalize(href)
|
try:
|
||||||
|
href = urlnormalize(href)
|
||||||
|
except ValueError:
|
||||||
|
# href has non utf-8 quoting
|
||||||
|
return url
|
||||||
if href in self.map:
|
if href in self.map:
|
||||||
anchor_map = self.map[href]
|
anchor_map = self.map[href]
|
||||||
nhref = anchor_map[frag if frag else None]
|
nhref = anchor_map[frag if frag else None]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user