mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-04 03:27:00 -05:00 
			
		
		
		
	EPUB/AZW3 Output: Fix regression that caused erros when trying to convert documents that have URLs with invalid (non-utf-8) quoting. Fixes #1181049 (in vers .9.30 I can't download WSJ but works fine in earlier versions)
This commit is contained in:
		
							parent
							
								
									55808e5f68
								
							
						
					
					
						commit
						16c5f8b1c1
					
				@ -373,7 +373,7 @@ def urlquote(href):
 | 
				
			|||||||
        result.append(char)
 | 
					        result.append(char)
 | 
				
			||||||
    return ''.join(result)
 | 
					    return ''.join(result)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def urlunquote(href):
 | 
					def urlunquote(href, error_handling='strict'):
 | 
				
			||||||
    # unquote must run on a bytestring and will return a bytestring
 | 
					    # unquote must run on a bytestring and will return a bytestring
 | 
				
			||||||
    # If it runs on a unicode object, it returns a double encoded unicode
 | 
					    # If it runs on a unicode object, it returns a double encoded unicode
 | 
				
			||||||
    # string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
 | 
					    # string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
 | 
				
			||||||
@ -383,7 +383,10 @@ def urlunquote(href):
 | 
				
			|||||||
        href = href.encode('utf-8')
 | 
					        href = href.encode('utf-8')
 | 
				
			||||||
    href = unquote(href)
 | 
					    href = unquote(href)
 | 
				
			||||||
    if want_unicode:
 | 
					    if want_unicode:
 | 
				
			||||||
        href = href.decode('utf-8')
 | 
					        # The quoted characters could have been in some encoding other than
 | 
				
			||||||
 | 
					        # UTF-8, this often happens with old/broken web servers. There is no
 | 
				
			||||||
 | 
					        # way to know what that encoding should be in this context.
 | 
				
			||||||
 | 
					        href = href.decode('utf-8', error_handling)
 | 
				
			||||||
    return href
 | 
					    return href
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def urlnormalize(href):
 | 
					def urlnormalize(href):
 | 
				
			||||||
 | 
				
			|||||||
@ -159,7 +159,11 @@ class Split(object):
 | 
				
			|||||||
        except ValueError:
 | 
					        except ValueError:
 | 
				
			||||||
            # Unparseable URL
 | 
					            # Unparseable URL
 | 
				
			||||||
            return url
 | 
					            return url
 | 
				
			||||||
        href = urlnormalize(href)
 | 
					        try:
 | 
				
			||||||
 | 
					            href = urlnormalize(href)
 | 
				
			||||||
 | 
					        except ValueError:
 | 
				
			||||||
 | 
					            # href has non utf-8 quoting
 | 
				
			||||||
 | 
					            return url
 | 
				
			||||||
        if href in self.map:
 | 
					        if href in self.map:
 | 
				
			||||||
            anchor_map = self.map[href]
 | 
					            anchor_map = self.map[href]
 | 
				
			||||||
            nhref = anchor_map[frag if frag else None]
 | 
					            nhref = anchor_map[frag if frag else None]
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user