mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
TXTZ Input: Include images in manifest. They were not always being included by the HTML input plugin.
This commit is contained in:
parent
95892f204b
commit
04b80eb9ee
@ -4,13 +4,15 @@ __license__ = 'GPL 3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import glob
|
||||
import mimetypes
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from calibre import _ent_pat, xml_entity_to_unicode
|
||||
from calibre import _ent_pat, walk, xml_entity_to_unicode
|
||||
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
|
||||
from calibre.ebooks.chardet import detect
|
||||
from calibre.ebooks.oeb.base import OEB_IMAGES
|
||||
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
|
||||
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
|
||||
preserve_spaces, detect_paragraph_type, detect_formatting_type, \
|
||||
@ -67,6 +69,8 @@ class TXTInput(InputFormatPlugin):
|
||||
txt = ''
|
||||
log.debug('Reading text from file...')
|
||||
length = 0
|
||||
# [(u'path', mime),]
|
||||
images = []
|
||||
|
||||
# Extract content from zip archive.
|
||||
if file_ext == 'txtz':
|
||||
@ -75,10 +79,19 @@ class TXTInput(InputFormatPlugin):
|
||||
zf = ZipFile(stream)
|
||||
zf.extractall(tdir)
|
||||
|
||||
txts = glob.glob(os.path.join(tdir, '*.txt'))
|
||||
for t in txts:
|
||||
with open(t, 'rb') as tf:
|
||||
txt += tf.read()
|
||||
for x in walk(tdir):
|
||||
if not os.path.isfile(x):
|
||||
continue
|
||||
if os.path.splitext(x)[1].lower() == '.txt':
|
||||
with open(x, 'rb') as tf:
|
||||
txt += tf.read() + '\n\n'
|
||||
if mimetypes.guess_type(x)[0] in OEB_IMAGES:
|
||||
path = os.path.relpath(x, tdir)
|
||||
dir = os.path.join(os.getcwd(), os.path.dirname(path))
|
||||
if not os.path.exists(dir):
|
||||
os.makedirs(dir)
|
||||
shutil.copy(x, os.path.join(os.getcwd(), path))
|
||||
images.append((path, mimetypes.guess_type(x)[0]))
|
||||
else:
|
||||
txt = stream.read()
|
||||
|
||||
@ -193,9 +206,13 @@ class TXTInput(InputFormatPlugin):
|
||||
htmlfile.write(html.encode('utf-8'))
|
||||
odi = options.debug_pipeline
|
||||
options.debug_pipeline = None
|
||||
# Generate oeb from htl conversion.
|
||||
# Generate oeb from html conversion.
|
||||
oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
|
||||
{})
|
||||
# Add images from from txtz archive to oeb.
|
||||
for image, mime in images:
|
||||
id, href = oeb.manifest.generate(id='image', href=image)
|
||||
oeb.manifest.add(id, href, mime)
|
||||
options.debug_pipeline = odi
|
||||
os.remove(htmlfile.name)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user