mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
EPUB Input: Fix handling of EPUB files that contain images with non-ascii filenames. Fixes #1171186 (Private bug)
This commit is contained in:
parent
763c921108
commit
6dbd826c51
@ -1,7 +1,6 @@
|
|||||||
'''
|
'''
|
||||||
Basic support for manipulating OEB 1.x/2.0 content and metadata.
|
Basic support for manipulating OEB 1.x/2.0 content and metadata.
|
||||||
'''
|
'''
|
||||||
from __future__ import with_statement
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
@ -11,7 +10,7 @@ import os, re, uuid, logging
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from itertools import count
|
from itertools import count
|
||||||
from urlparse import urldefrag, urlparse, urlunparse, urljoin
|
from urlparse import urldefrag, urlparse, urlunparse, urljoin
|
||||||
from urllib import unquote as urlunquote
|
from urllib import unquote
|
||||||
|
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
from calibre.constants import filesystem_encoding, __version__
|
from calibre.constants import filesystem_encoding, __version__
|
||||||
@ -372,6 +371,19 @@ def urlquote(href):
|
|||||||
result.append(char)
|
result.append(char)
|
||||||
return ''.join(result)
|
return ''.join(result)
|
||||||
|
|
||||||
|
def urlunquote(href):
|
||||||
|
# unquote must run on a bytestring and will return a bytestring
|
||||||
|
# If it runs on a unicode object, it returns a double encoded unicode
|
||||||
|
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
|
||||||
|
# and the latter is correct
|
||||||
|
want_unicode = isinstance(href, unicode)
|
||||||
|
if want_unicode:
|
||||||
|
href = href.encode('utf-8')
|
||||||
|
href = unquote(href)
|
||||||
|
if want_unicode:
|
||||||
|
href = href.decode('utf-8')
|
||||||
|
return href
|
||||||
|
|
||||||
def urlnormalize(href):
|
def urlnormalize(href):
|
||||||
"""Convert a URL into normalized form, with all and only URL-unsafe
|
"""Convert a URL into normalized form, with all and only URL-unsafe
|
||||||
characters URL quoted.
|
characters URL quoted.
|
||||||
@ -468,7 +480,7 @@ class DirContainer(object):
|
|||||||
return
|
return
|
||||||
|
|
||||||
def _unquote(self, path):
|
def _unquote(self, path):
|
||||||
# urlunquote must run on a bytestring and will return a bytestring
|
# unquote must run on a bytestring and will return a bytestring
|
||||||
# If it runs on a unicode object, it returns a double encoded unicode
|
# If it runs on a unicode object, it returns a double encoded unicode
|
||||||
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
|
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
|
||||||
# and the latter is correct
|
# and the latter is correct
|
||||||
|
@ -196,6 +196,8 @@ class OEBReader(object):
|
|||||||
item.media_type[-4:] in ('/xml', '+xml')):
|
item.media_type[-4:] in ('/xml', '+xml')):
|
||||||
hrefs = [r[2] for r in iterlinks(data)]
|
hrefs = [r[2] for r in iterlinks(data)]
|
||||||
for href in hrefs:
|
for href in hrefs:
|
||||||
|
if isinstance(href, bytes):
|
||||||
|
href = href.decode('utf-8')
|
||||||
href, _ = urldefrag(href)
|
href, _ = urldefrag(href)
|
||||||
if not href:
|
if not href:
|
||||||
continue
|
continue
|
||||||
|
@ -47,6 +47,8 @@ class ManifestTrimmer(object):
|
|||||||
item.data is not None:
|
item.data is not None:
|
||||||
hrefs = [r[2] for r in iterlinks(item.data)]
|
hrefs = [r[2] for r in iterlinks(item.data)]
|
||||||
for href in hrefs:
|
for href in hrefs:
|
||||||
|
if isinstance(href, bytes):
|
||||||
|
href = href.decode('utf-8')
|
||||||
try:
|
try:
|
||||||
href = item.abshref(urlnormalize(href))
|
href = item.abshref(urlnormalize(href))
|
||||||
except:
|
except:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user