From 16c5f8b1c1c99f180e3ec00d8a5c22cc69886e6c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 18 May 2013 08:27:07 +0530
Subject: [PATCH] EPUB/AZW3 Output: Fix regression that caused erros when
 trying to convert documents that have URLs with invalid (non-utf-8) quoting.
 Fixes #1181049 (in vers .9.30 I can't download WSJ but works fine in earlier
 versions)

---
 src/calibre/ebooks/oeb/base.py             | 7 +++++--
 src/calibre/ebooks/oeb/transforms/split.py | 6 +++++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index 671caf49fc..d4b3a2b7ab 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -373,7 +373,7 @@ def urlquote(href):
         result.append(char)
     return ''.join(result)
 
-def urlunquote(href):
+def urlunquote(href, error_handling='strict'):
     # unquote must run on a bytestring and will return a bytestring
     # If it runs on a unicode object, it returns a double encoded unicode
     # string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
@@ -383,7 +383,10 @@ def urlunquote(href):
         href = href.encode('utf-8')
     href = unquote(href)
     if want_unicode:
-        href = href.decode('utf-8')
+        # The quoted characters could have been in some encoding other than
+        # UTF-8, this often happens with old/broken web servers. There is no
+        # way to know what that encoding should be in this context.
+        href = href.decode('utf-8', error_handling)
     return href
 
 def urlnormalize(href):
diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py
index 0ca3888c17..123f61b047 100644
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@@ -159,7 +159,11 @@ class Split(object):
         except ValueError:
             # Unparseable URL
             return url
-        href = urlnormalize(href)
+        try:
+            href = urlnormalize(href)
+        except ValueError:
+            # href has non utf-8 quoting
+            return url
         if href in self.map:
             anchor_map = self.map[href]
             nhref = anchor_map[frag if frag else None]