mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 19:17:02 -05:00 
			
		
		
		
	EPUB Input: Fix handling of EPUB files that contain images with non-ascii filenames. Fixes #1171186 (Private bug)
This commit is contained in:
		
							parent
							
								
									763c921108
								
							
						
					
					
						commit
						6dbd826c51
					
				@ -1,7 +1,6 @@
 | 
				
			|||||||
'''
 | 
					'''
 | 
				
			||||||
Basic support for manipulating OEB 1.x/2.0 content and metadata.
 | 
					Basic support for manipulating OEB 1.x/2.0 content and metadata.
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
from __future__ import with_statement
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 | 
					__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 | 
				
			||||||
@ -11,7 +10,7 @@ import os, re, uuid, logging
 | 
				
			|||||||
from collections import defaultdict
 | 
					from collections import defaultdict
 | 
				
			||||||
from itertools import count
 | 
					from itertools import count
 | 
				
			||||||
from urlparse import urldefrag, urlparse, urlunparse, urljoin
 | 
					from urlparse import urldefrag, urlparse, urlunparse, urljoin
 | 
				
			||||||
from urllib import unquote as urlunquote
 | 
					from urllib import unquote
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from lxml import etree, html
 | 
					from lxml import etree, html
 | 
				
			||||||
from calibre.constants import filesystem_encoding, __version__
 | 
					from calibre.constants import filesystem_encoding, __version__
 | 
				
			||||||
@ -372,6 +371,19 @@ def urlquote(href):
 | 
				
			|||||||
        result.append(char)
 | 
					        result.append(char)
 | 
				
			||||||
    return ''.join(result)
 | 
					    return ''.join(result)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def urlunquote(href):
 | 
				
			||||||
 | 
					    # unquote must run on a bytestring and will return a bytestring
 | 
				
			||||||
 | 
					    # If it runs on a unicode object, it returns a double encoded unicode
 | 
				
			||||||
 | 
					    # string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
 | 
				
			||||||
 | 
					    # and the latter is correct
 | 
				
			||||||
 | 
					    want_unicode = isinstance(href, unicode)
 | 
				
			||||||
 | 
					    if want_unicode:
 | 
				
			||||||
 | 
					        href = href.encode('utf-8')
 | 
				
			||||||
 | 
					    href = unquote(href)
 | 
				
			||||||
 | 
					    if want_unicode:
 | 
				
			||||||
 | 
					        href = href.decode('utf-8')
 | 
				
			||||||
 | 
					    return href
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def urlnormalize(href):
 | 
					def urlnormalize(href):
 | 
				
			||||||
    """Convert a URL into normalized form, with all and only URL-unsafe
 | 
					    """Convert a URL into normalized form, with all and only URL-unsafe
 | 
				
			||||||
    characters URL quoted.
 | 
					    characters URL quoted.
 | 
				
			||||||
@ -468,7 +480,7 @@ class DirContainer(object):
 | 
				
			|||||||
                    return
 | 
					                    return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _unquote(self, path):
 | 
					    def _unquote(self, path):
 | 
				
			||||||
        # urlunquote must run on a bytestring and will return a bytestring
 | 
					        # unquote must run on a bytestring and will return a bytestring
 | 
				
			||||||
        # If it runs on a unicode object, it returns a double encoded unicode
 | 
					        # If it runs on a unicode object, it returns a double encoded unicode
 | 
				
			||||||
        # string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
 | 
					        # string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
 | 
				
			||||||
        # and the latter is correct
 | 
					        # and the latter is correct
 | 
				
			||||||
 | 
				
			|||||||
@ -196,6 +196,8 @@ class OEBReader(object):
 | 
				
			|||||||
                        item.media_type[-4:] in ('/xml', '+xml')):
 | 
					                        item.media_type[-4:] in ('/xml', '+xml')):
 | 
				
			||||||
                    hrefs = [r[2] for r in iterlinks(data)]
 | 
					                    hrefs = [r[2] for r in iterlinks(data)]
 | 
				
			||||||
                    for href in hrefs:
 | 
					                    for href in hrefs:
 | 
				
			||||||
 | 
					                        if isinstance(href, bytes):
 | 
				
			||||||
 | 
					                            href = href.decode('utf-8')
 | 
				
			||||||
                        href, _ = urldefrag(href)
 | 
					                        href, _ = urldefrag(href)
 | 
				
			||||||
                        if not href:
 | 
					                        if not href:
 | 
				
			||||||
                            continue
 | 
					                            continue
 | 
				
			||||||
 | 
				
			|||||||
@ -47,6 +47,8 @@ class ManifestTrimmer(object):
 | 
				
			|||||||
                   item.data is not None:
 | 
					                   item.data is not None:
 | 
				
			||||||
                    hrefs = [r[2] for r in iterlinks(item.data)]
 | 
					                    hrefs = [r[2] for r in iterlinks(item.data)]
 | 
				
			||||||
                    for href in hrefs:
 | 
					                    for href in hrefs:
 | 
				
			||||||
 | 
					                        if isinstance(href, bytes):
 | 
				
			||||||
 | 
					                            href = href.decode('utf-8')
 | 
				
			||||||
                        try:
 | 
					                        try:
 | 
				
			||||||
                            href = item.abshref(urlnormalize(href))
 | 
					                            href = item.abshref(urlnormalize(href))
 | 
				
			||||||
                        except:
 | 
					                        except:
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user