mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Don't download comment only HTML pages
This commit is contained in:
parent
9d20d7f43e
commit
132ac4b850
@ -321,7 +321,8 @@ class RecursiveFetcher(object):
|
|||||||
self.current_dir = linkdiskpath
|
self.current_dir = linkdiskpath
|
||||||
f = self.fetch_url(iurl)
|
f = self.fetch_url(iurl)
|
||||||
dsrc = f.read()
|
dsrc = f.read()
|
||||||
if len(dsrc) == 0:
|
if len(dsrc) == 0 or \
|
||||||
|
len(re.compile('<!--.*?-->', re.DOTALL).sub('', dsrc).strip()) == 0:
|
||||||
raise ValueError('No content at URL %s'%iurl)
|
raise ValueError('No content at URL %s'%iurl)
|
||||||
if self.encoding is not None:
|
if self.encoding is not None:
|
||||||
dsrc = dsrc.decode(self.encoding, 'ignore')
|
dsrc = dsrc.decode(self.encoding, 'ignore')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user