mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
EPUB Input: Fix handling of EPUB files that contain images with non-ascii filenames. Fixes #1171186 (Private bug)
This commit is contained in:
parent
763c921108
commit
6dbd826c51
@ -1,7 +1,6 @@
|
||||
'''
|
||||
Basic support for manipulating OEB 1.x/2.0 content and metadata.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
@ -11,7 +10,7 @@ import os, re, uuid, logging
|
||||
from collections import defaultdict
|
||||
from itertools import count
|
||||
from urlparse import urldefrag, urlparse, urlunparse, urljoin
|
||||
from urllib import unquote as urlunquote
|
||||
from urllib import unquote
|
||||
|
||||
from lxml import etree, html
|
||||
from calibre.constants import filesystem_encoding, __version__
|
||||
@ -372,6 +371,19 @@ def urlquote(href):
|
||||
result.append(char)
|
||||
return ''.join(result)
|
||||
|
||||
def urlunquote(href):
|
||||
# unquote must run on a bytestring and will return a bytestring
|
||||
# If it runs on a unicode object, it returns a double encoded unicode
|
||||
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
|
||||
# and the latter is correct
|
||||
want_unicode = isinstance(href, unicode)
|
||||
if want_unicode:
|
||||
href = href.encode('utf-8')
|
||||
href = unquote(href)
|
||||
if want_unicode:
|
||||
href = href.decode('utf-8')
|
||||
return href
|
||||
|
||||
def urlnormalize(href):
|
||||
"""Convert a URL into normalized form, with all and only URL-unsafe
|
||||
characters URL quoted.
|
||||
@ -468,7 +480,7 @@ class DirContainer(object):
|
||||
return
|
||||
|
||||
def _unquote(self, path):
|
||||
# urlunquote must run on a bytestring and will return a bytestring
|
||||
# unquote must run on a bytestring and will return a bytestring
|
||||
# If it runs on a unicode object, it returns a double encoded unicode
|
||||
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
|
||||
# and the latter is correct
|
||||
|
@ -196,6 +196,8 @@ class OEBReader(object):
|
||||
item.media_type[-4:] in ('/xml', '+xml')):
|
||||
hrefs = [r[2] for r in iterlinks(data)]
|
||||
for href in hrefs:
|
||||
if isinstance(href, bytes):
|
||||
href = href.decode('utf-8')
|
||||
href, _ = urldefrag(href)
|
||||
if not href:
|
||||
continue
|
||||
|
@ -47,6 +47,8 @@ class ManifestTrimmer(object):
|
||||
item.data is not None:
|
||||
hrefs = [r[2] for r in iterlinks(item.data)]
|
||||
for href in hrefs:
|
||||
if isinstance(href, bytes):
|
||||
href = href.decode('utf-8')
|
||||
try:
|
||||
href = item.abshref(urlnormalize(href))
|
||||
except:
|
||||
|
Loading…
x
Reference in New Issue
Block a user