Fix #6085 (News download failure)

This commit is contained in:
Kovid Goyal 2010-07-03 19:36:11 -06:00
parent e019322f3c
commit 8376081885

View File

@ -238,7 +238,7 @@ class RecursiveFetcher(object):
soup = BeautifulSoup(u'<a href="'+url+'" />') soup = BeautifulSoup(u'<a href="'+url+'" />')
self.log.debug('Downloading') self.log.debug('Downloading')
res = self.process_links(soup, url, 0, into_dir='') res = self.process_links(soup, url, 0, into_dir='')
self.log.debug('%s saved to %s'%( url, res)) self.log.debug(url, 'saved to', res)
return res return res
def is_link_ok(self, url): def is_link_ok(self, url):
@ -281,7 +281,7 @@ class RecursiveFetcher(object):
try: try:
data = self.fetch_url(iurl) data = self.fetch_url(iurl)
except Exception: except Exception:
self.log.exception('Could not fetch stylesheet %s'% iurl) self.log.exception('Could not fetch stylesheet ', iurl)
continue continue
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css') stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
with self.stylemap_lock: with self.stylemap_lock:
@ -304,7 +304,7 @@ class RecursiveFetcher(object):
try: try:
data = self.fetch_url(iurl) data = self.fetch_url(iurl)
except Exception: except Exception:
self.log.exception('Could not fetch stylesheet %s'% iurl) self.log.exception('Could not fetch stylesheet ', iurl)
continue continue
c += 1 c += 1
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css') stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
@ -337,7 +337,7 @@ class RecursiveFetcher(object):
# Skip empty GIF files as PIL errors on them anyway # Skip empty GIF files as PIL errors on them anyway
continue continue
except Exception: except Exception:
self.log.exception('Could not fetch image %s'% iurl) self.log.exception('Could not fetch image ', iurl)
continue continue
c += 1 c += 1
fname = ascii_filename('img'+str(c)) fname = ascii_filename('img'+str(c))
@ -423,7 +423,7 @@ class RecursiveFetcher(object):
newbaseurl = dsrc.newurl newbaseurl = dsrc.newurl
if len(dsrc) == 0 or \ if len(dsrc) == 0 or \
len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0: len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0:
raise ValueError('No content at URL %s'%iurl) raise ValueError('No content at URL %r'%iurl)
if callable(self.encoding): if callable(self.encoding):
dsrc = self.encoding(dsrc) dsrc = self.encoding(dsrc)
elif self.encoding is not None: elif self.encoding is not None: