HTML Input: Ignore links to text files. Fixes #791568 (collections of html with txt fails)

This commit is contained in:
Kovid Goyal 2011-06-13 21:52:16 -06:00
parent 983cf1e52f
commit 20210f046b

View File

@ -455,13 +455,16 @@ class HTMLInput(InputFormatPlugin):
bhref = os.path.basename(link) bhref = os.path.basename(link)
id, href = self.oeb.manifest.generate(id='added', id, href = self.oeb.manifest.generate(id='added',
href=bhref) href=bhref)
guessed = self.guess_type(href)[0]
media_type = guessed or self.BINARY_MIME
if 'text' in media_type:
self.log.warn('Ignoring link to text file %r'%link_)
return None
self.oeb.log.debug('Added', link) self.oeb.log.debug('Added', link)
self.oeb.container = self.DirContainer(os.path.dirname(link), self.oeb.container = self.DirContainer(os.path.dirname(link),
self.oeb.log, ignore_opf=True) self.oeb.log, ignore_opf=True)
# Load into memory # Load into memory
guessed = self.guess_type(href)[0]
media_type = guessed or self.BINARY_MIME
item = self.oeb.manifest.add(id, href, media_type) item = self.oeb.manifest.add(id, href, media_type)
item.html_input_href = bhref item.html_input_href = bhref
if guessed in self.OEB_STYLES: if guessed in self.OEB_STYLES: