HTML Input: Ignore unparseable URLs instead of crashing on them. Fixes #902372 (HTML convert crashes with " Invalid IPv6 URL" error)

This commit is contained in:
Kovid Goyal 2011-12-10 09:36:08 +05:30
parent 59179fdfe1
commit ff293d73ac
3 changed files with 15 additions and 3 deletions

View File

@ -148,7 +148,11 @@ class HTMLFile(object):
url = match.group(i)
if url:
break
link = self.resolve(url)
try:
link = self.resolve(url)
except ValueError:
# Unparseable URL, ignore
continue
if link not in self.links:
self.links.append(link)

View File

@ -178,7 +178,11 @@ class Serializer(object):
at the end.
'''
hrefs = self.oeb.manifest.hrefs
path, frag = urldefrag(urlnormalize(href))
try:
path, frag = urldefrag(urlnormalize(href))
except ValueError:
# Unparseable URL
return False
if path and base:
path = base.abshref(path)
if path and path not in hrefs:

View File

@ -154,7 +154,11 @@ class Split(object):
def rewrite_links(self, url):
href, frag = urldefrag(url)
href = self.current_item.abshref(href)
try:
href = self.current_item.abshref(href)
except ValueError:
# Unparseable URL
return url
if href in self.map:
anchor_map = self.map[href]
nhref = anchor_map[frag if frag else None]