TXTZ Input: Include images in manifest. They were not always being included by the HTML input plugin.

This commit is contained in:
John Schember 2011-02-12 17:34:29 -05:00
parent 95892f204b
commit 04b80eb9ee

View File

@ -4,13 +4,15 @@ __license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>' __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import glob import mimetypes
import os import os
import shutil
from calibre import _ent_pat, xml_entity_to_unicode from calibre import _ent_pat, walk, xml_entity_to_unicode
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from calibre.ebooks.chardet import detect from calibre.ebooks.chardet import detect
from calibre.ebooks.oeb.base import OEB_IMAGES
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
preserve_spaces, detect_paragraph_type, detect_formatting_type, \ preserve_spaces, detect_paragraph_type, detect_formatting_type, \
@ -67,6 +69,8 @@ class TXTInput(InputFormatPlugin):
txt = '' txt = ''
log.debug('Reading text from file...') log.debug('Reading text from file...')
length = 0 length = 0
# [(u'path', mime),]
images = []
# Extract content from zip archive. # Extract content from zip archive.
if file_ext == 'txtz': if file_ext == 'txtz':
@ -75,10 +79,19 @@ class TXTInput(InputFormatPlugin):
zf = ZipFile(stream) zf = ZipFile(stream)
zf.extractall(tdir) zf.extractall(tdir)
txts = glob.glob(os.path.join(tdir, '*.txt')) for x in walk(tdir):
for t in txts: if not os.path.isfile(x):
with open(t, 'rb') as tf: continue
txt += tf.read() if os.path.splitext(x)[1].lower() == '.txt':
with open(x, 'rb') as tf:
txt += tf.read() + '\n\n'
if mimetypes.guess_type(x)[0] in OEB_IMAGES:
path = os.path.relpath(x, tdir)
dir = os.path.join(os.getcwd(), os.path.dirname(path))
if not os.path.exists(dir):
os.makedirs(dir)
shutil.copy(x, os.path.join(os.getcwd(), path))
images.append((path, mimetypes.guess_type(x)[0]))
else: else:
txt = stream.read() txt = stream.read()
@ -193,9 +206,13 @@ class TXTInput(InputFormatPlugin):
htmlfile.write(html.encode('utf-8')) htmlfile.write(html.encode('utf-8'))
odi = options.debug_pipeline odi = options.debug_pipeline
options.debug_pipeline = None options.debug_pipeline = None
# Generate oeb from htl conversion. # Generate oeb from html conversion.
oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log, oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
{}) {})
# Add images from from txtz archive to oeb.
for image, mime in images:
id, href = oeb.manifest.generate(id='image', href=image)
oeb.manifest.add(id, href, mime)
options.debug_pipeline = odi options.debug_pipeline = odi
os.remove(htmlfile.name) os.remove(htmlfile.name)