TXTZ Input: Fix image processing

This commit is contained in:
Kovid Goyal 2011-02-13 17:46:31 -07:00
parent a5cdad2705
commit c2aa2b56bc

View File

@ -5,19 +5,16 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os
import shutil
from calibre import _ent_pat, walk, xml_entity_to_unicode, guess_type from calibre import _ent_pat, walk, xml_entity_to_unicode
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from calibre.ebooks.chardet import detect from calibre.ebooks.chardet import detect
from calibre.ebooks.oeb.base import OEB_IMAGES
from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \ from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
separate_paragraphs_single_line, separate_paragraphs_print_formatted, \ separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
preserve_spaces, detect_paragraph_type, detect_formatting_type, \ preserve_spaces, detect_paragraph_type, detect_formatting_type, \
normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \ normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \
separate_hard_scene_breaks separate_hard_scene_breaks
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
class TXTInput(InputFormatPlugin): class TXTInput(InputFormatPlugin):
@ -69,29 +66,16 @@ class TXTInput(InputFormatPlugin):
log.debug('Reading text from file...') log.debug('Reading text from file...')
length = 0 length = 0
# [(u'path', mime),] # [(u'path', mime),]
images = []
# Extract content from zip archive. # Extract content from zip archive.
if file_ext == 'txtz': if file_ext == 'txtz':
log.debug('De-compressing content to temporary directory...') zf = ZipFile(stream)
with TemporaryDirectory('_untxtz') as tdir: zf.extractall('.')
zf = ZipFile(stream)
zf.extractall(tdir)
for x in walk(tdir): for x in walk('.'):
if not os.path.isfile(x): if os.path.splitext(x)[1].lower() == '.txt':
continue with open(x, 'rb') as tf:
if os.path.splitext(x)[1].lower() == '.txt': txt += tf.read() + '\n\n'
with open(x, 'rb') as tf:
txt += tf.read() + '\n\n'
mt = guess_type(x)[0]
if mt in OEB_IMAGES:
path = os.path.relpath(x, tdir)
dir = os.path.join(os.getcwd(), os.path.dirname(path))
if not os.path.exists(dir):
os.makedirs(dir)
shutil.copy(x, os.path.join(os.getcwd(), path))
images.append((path, mt))
else: else:
txt = stream.read() txt = stream.read()
@ -194,7 +178,7 @@ class TXTInput(InputFormatPlugin):
setattr(options, opt.option.name, opt.recommended_value) setattr(options, opt.option.name, opt.recommended_value)
options.input_encoding = 'utf-8' options.input_encoding = 'utf-8'
base = os.getcwdu() base = os.getcwdu()
if hasattr(stream, 'name'): if file_ext != 'txtz' and hasattr(stream, 'name'):
base = os.path.dirname(stream.name) base = os.path.dirname(stream.name)
fname = os.path.join(base, 'index.html') fname = os.path.join(base, 'index.html')
c = 0 c = 0
@ -209,12 +193,6 @@ class TXTInput(InputFormatPlugin):
# Generate oeb from html conversion. # Generate oeb from html conversion.
oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log, oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
{}) {})
# Add images from from txtz archive to oeb.
# Disabled as the conversion pipeline adds unmanifested items that are
# referred to in the content automatically
#for image, mime in images:
# id, href = oeb.manifest.generate(id='image', href=image)
# oeb.manifest.add(id, href, mime)
options.debug_pipeline = odi options.debug_pipeline = odi
os.remove(htmlfile.name) os.remove(htmlfile.name)