mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: Port urlunquote
Also take the opportunity to make unquote correct on python 2 by moving urlunquote to the polyglot module
This commit is contained in:
parent
50dd4952cb
commit
1a602a7873
@ -23,7 +23,7 @@ from calibre.ebooks.oeb.parse_utils import (barename, XHTML_NS, RECOVER_PARSER,
|
|||||||
from calibre.utils.cleantext import clean_xml_chars
|
from calibre.utils.cleantext import clean_xml_chars
|
||||||
from calibre.utils.short_uuid import uuid4
|
from calibre.utils.short_uuid import uuid4
|
||||||
from polyglot.builtins import iteritems, unicode_type, string_or_bytes, range, itervalues, filter
|
from polyglot.builtins import iteritems, unicode_type, string_or_bytes, range, itervalues, filter
|
||||||
from polyglot.urllib import unquote, urldefrag, urljoin, urlparse, urlunparse
|
from polyglot.urllib import unquote as urlunquote, urldefrag, urljoin, urlparse, urlunparse
|
||||||
from calibre.utils.icu import numeric_sort_key
|
from calibre.utils.icu import numeric_sort_key
|
||||||
|
|
||||||
XML_NS = 'http://www.w3.org/XML/1998/namespace'
|
XML_NS = 'http://www.w3.org/XML/1998/namespace'
|
||||||
@ -455,23 +455,6 @@ def urlquote(href):
|
|||||||
return ''.join(result)
|
return ''.join(result)
|
||||||
|
|
||||||
|
|
||||||
def urlunquote(href, error_handling='strict'):
|
|
||||||
# unquote must run on a bytestring and will return a bytestring
|
|
||||||
# If it runs on a unicode object, it returns a double encoded unicode
|
|
||||||
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
|
|
||||||
# and the latter is correct
|
|
||||||
want_unicode = isinstance(href, unicode_type)
|
|
||||||
if want_unicode:
|
|
||||||
href = href.encode('utf-8')
|
|
||||||
href = unquote(href)
|
|
||||||
if want_unicode:
|
|
||||||
# The quoted characters could have been in some encoding other than
|
|
||||||
# UTF-8, this often happens with old/broken web servers. There is no
|
|
||||||
# way to know what that encoding should be in this context.
|
|
||||||
href = href.decode('utf-8', error_handling)
|
|
||||||
return href
|
|
||||||
|
|
||||||
|
|
||||||
def urlnormalize(href):
|
def urlnormalize(href):
|
||||||
"""Convert a URL into normalized form, with all and only URL-unsafe
|
"""Convert a URL into normalized form, with all and only URL-unsafe
|
||||||
characters URL quoted.
|
characters URL quoted.
|
||||||
|
@ -10,14 +10,36 @@ if is_py3:
|
|||||||
from urllib.request import (build_opener, getproxies, install_opener, # noqa
|
from urllib.request import (build_opener, getproxies, install_opener, # noqa
|
||||||
HTTPBasicAuthHandler, HTTPCookieProcessor, HTTPDigestAuthHandler, # noqa
|
HTTPBasicAuthHandler, HTTPCookieProcessor, HTTPDigestAuthHandler, # noqa
|
||||||
url2pathname, urlopen, Request) # noqa
|
url2pathname, urlopen, Request) # noqa
|
||||||
from urllib.parse import (parse_qs, quote, unquote, quote_plus, urldefrag, # noqa
|
from urllib.parse import (parse_qs, quote, unquote as uq, quote_plus, urldefrag, # noqa
|
||||||
urlencode, urljoin, urlparse, urlunparse, urlsplit, urlunsplit) # noqa
|
urlencode, urljoin, urlparse, urlunparse, urlsplit, urlunsplit) # noqa
|
||||||
from urllib.error import HTTPError, URLError # noqa
|
from urllib.error import HTTPError, URLError # noqa
|
||||||
|
|
||||||
|
def unquote(x, encoding='utf-8', errors='replace'):
|
||||||
|
binary = isinstance(x, bytes)
|
||||||
|
if binary:
|
||||||
|
x = x.decode(encoding, errors)
|
||||||
|
ans = uq(x, encoding, errors)
|
||||||
|
if binary:
|
||||||
|
ans = ans.encode(encoding, errors)
|
||||||
|
return ans
|
||||||
else:
|
else:
|
||||||
from urllib import (getproxies, quote, unquote, quote_plus, url2pathname, # noqa
|
from urllib import (getproxies, quote, unquote as uq, quote_plus, url2pathname, # noqa
|
||||||
urlencode) # noqa
|
urlencode) # noqa
|
||||||
from urllib2 import (build_opener, install_opener, HTTPBasicAuthHandler, # noqa
|
from urllib2 import (build_opener, install_opener, HTTPBasicAuthHandler, # noqa
|
||||||
HTTPCookieProcessor, HTTPDigestAuthHandler, HTTPError, URLError, # noqa
|
HTTPCookieProcessor, HTTPDigestAuthHandler, HTTPError, URLError, # noqa
|
||||||
urlopen, Request) # noqa
|
urlopen, Request) # noqa
|
||||||
from urlparse import (parse_qs, urldefrag, urljoin, urlparse, urlunparse, # noqa
|
from urlparse import (parse_qs, urldefrag, urljoin, urlparse, urlunparse, # noqa
|
||||||
urlsplit, urlunsplit) # noqa
|
urlsplit, urlunsplit) # noqa
|
||||||
|
|
||||||
|
def unquote(x, encoding='utf-8', errors='replace'):
|
||||||
|
# unquote must run on a bytestring and will return a bytestring
|
||||||
|
# If it runs on a unicode object, it returns a double encoded unicode
|
||||||
|
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
|
||||||
|
# and the latter is correct
|
||||||
|
binary = isinstance(x, bytes)
|
||||||
|
if not binary:
|
||||||
|
x = x.encode(encoding, errors)
|
||||||
|
ans = uq(x)
|
||||||
|
if not binary:
|
||||||
|
ans = ans.decode(encoding, errors)
|
||||||
|
return ans
|
||||||
|
Loading…
x
Reference in New Issue
Block a user