From 20210f046ba65cb593620b0bac212c08d0928ed3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 13 Jun 2011 21:52:16 -0600 Subject: [PATCH] HTML Input: Ignore links to text files. Fixes #791568 (collections of html with txt fails) --- src/calibre/ebooks/html/input.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index 3d5f6c00ef..ce6c46c6cf 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -455,13 +455,16 @@ class HTMLInput(InputFormatPlugin): bhref = os.path.basename(link) id, href = self.oeb.manifest.generate(id='added', href=bhref) + guessed = self.guess_type(href)[0] + media_type = guessed or self.BINARY_MIME + if 'text' in media_type: + self.log.warn('Ignoring link to text file %r'%link_) + return None + self.oeb.log.debug('Added', link) self.oeb.container = self.DirContainer(os.path.dirname(link), self.oeb.log, ignore_opf=True) # Load into memory - guessed = self.guess_type(href)[0] - media_type = guessed or self.BINARY_MIME - item = self.oeb.manifest.add(id, href, media_type) item.html_input_href = bhref if guessed in self.OEB_STYLES: