From 04b80eb9eef7452d3ff70ceece2144e0b718f976 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Sat, 12 Feb 2011 17:34:29 -0500
Subject: [PATCH] TXTZ Input: Include images in manifest. They were not always
 being included by the HTML input plugin.

---
 src/calibre/ebooks/txt/input.py | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index 6d958b0e7c..9952845fdf 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -4,13 +4,15 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 
-import glob
+import mimetypes
 import os
+import shutil
 
-from calibre import _ent_pat, xml_entity_to_unicode
+from calibre import _ent_pat, walk, xml_entity_to_unicode
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
 from calibre.ebooks.chardet import detect
+from calibre.ebooks.oeb.base import OEB_IMAGES
 from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
     separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
     preserve_spaces, detect_paragraph_type, detect_formatting_type, \
@@ -67,6 +69,8 @@ class TXTInput(InputFormatPlugin):
         txt = ''
         log.debug('Reading text from file...')
         length = 0
+        # [(u'path', mime),]
+        images = []
 
         # Extract content from zip archive.
         if file_ext == 'txtz':
@@ -75,10 +79,19 @@ class TXTInput(InputFormatPlugin):
                 zf = ZipFile(stream)
                 zf.extractall(tdir)
 
-                txts = glob.glob(os.path.join(tdir, '*.txt'))
-                for t in txts:
-                    with open(t, 'rb') as tf:
-                        txt += tf.read()
+                for x in walk(tdir):
+                    if not os.path.isfile(x):
+                        continue
+                    if os.path.splitext(x)[1].lower() == '.txt':
+                        with open(x, 'rb') as tf:
+                            txt += tf.read() + '\n\n'
+                    if mimetypes.guess_type(x)[0] in OEB_IMAGES:
+                        path = os.path.relpath(x, tdir)
+                        dir = os.path.join(os.getcwd(), os.path.dirname(path))
+                        if not os.path.exists(dir):
+                            os.makedirs(dir)
+                        shutil.copy(x, os.path.join(os.getcwd(), path))
+                        images.append((path, mimetypes.guess_type(x)[0]))
         else:
             txt = stream.read()
 
@@ -193,9 +206,13 @@ class TXTInput(InputFormatPlugin):
             htmlfile.write(html.encode('utf-8'))
         odi = options.debug_pipeline
         options.debug_pipeline = None
-        # Generate oeb from htl conversion.
+        # Generate oeb from html conversion.
         oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
                 {})
+        # Add images from from txtz archive to oeb.
+        for image, mime in images:
+            id, href = oeb.manifest.generate(id='image', href=image)
+            oeb.manifest.add(id, href, mime)
         options.debug_pipeline = odi
         os.remove(htmlfile.name)