From c86de096980dda86c5743337d1a38787ee9105ed Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 16 Oct 2023 20:18:47 +0530 Subject: [PATCH] TXTZ Output: Fix cover not being properly identified in the generated TXTZ metadata. Fixes #2038848 [Ebook-viewer: book cover for a TXTZ file is not shown](https://bugs.launchpad.net/calibre/+bug/2038848) --- .../ebooks/conversion/plugins/txt_input.py | 29 ++++++++++++++----- .../ebooks/conversion/plugins/txt_output.py | 14 +++++++-- src/calibre/ebooks/metadata/extz.py | 6 ++++ 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/src/calibre/ebooks/conversion/plugins/txt_input.py b/src/calibre/ebooks/conversion/plugins/txt_input.py index 550e066524..0bac61121b 100644 --- a/src/calibre/ebooks/conversion/plugins/txt_input.py +++ b/src/calibre/ebooks/conversion/plugins/txt_input.py @@ -126,21 +126,23 @@ class TXTInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): - from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator from calibre.ebooks.chardet import detect + from calibre.ebooks.conversion.preprocess import Dehyphenator, DocAnalysis + from calibre.ebooks.txt.processor import ( + block_to_single_line, convert_basic, convert_markdown_with_metadata, + convert_textile, detect_formatting_type, detect_paragraph_type, + normalize_line_endings, preserve_spaces, remove_indents, + separate_hard_scene_breaks, separate_paragraphs_print_formatted, + separate_paragraphs_single_line, + ) from calibre.utils.zipfile import ZipFile - from calibre.ebooks.txt.processor import (convert_basic, - convert_markdown_with_metadata, separate_paragraphs_single_line, - separate_paragraphs_print_formatted, preserve_spaces, - detect_paragraph_type, detect_formatting_type, - normalize_line_endings, convert_textile, remove_indents, - block_to_single_line, separate_hard_scene_breaks) self.log = log txt = b'' log.debug('Reading text from file...') length = 0 base_dir = self.output_dir = os.getcwd() + cover_path = None # Extract content from zip archive. if file_ext == 'txtz': @@ -171,6 +173,10 @@ class TXTInput(InputFormatPlugin): options.formatting_type = txt_formatting if txt_formatting != 'plain': options.paragraph_type = 'off' + crelpath = root.find('cover-relpath-from-base') + if crelpath is not None and crelpath.text: + cover_path = os.path.abspath(crelpath.text) + if options.formatting_type == 'auto': if file_ext == 'textile': options.formatting_type = txt_formatting @@ -250,6 +256,7 @@ class TXTInput(InputFormatPlugin): txt = block_to_single_line(txt) elif options.paragraph_type == 'unformatted': from calibre.ebooks.conversion.utils import HeuristicProcessor + # unwrap lines based on punctuation docanalysis = DocAnalysis('txt', txt) length = docanalysis.line_length(.5) @@ -318,9 +325,17 @@ class TXTInput(InputFormatPlugin): if input_mi is None: from calibre.customize.ui import get_file_type_metadata input_mi = get_file_type_metadata(stream, file_ext) + from calibre import guess_type from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata meta_info_to_oeb_metadata(input_mi, oeb.metadata, log) self.html_postprocess_title = input_mi.title + if cover_path and os.path.exists(cover_path): + with open(os.path.join(os.getcwd(), cover_path), 'rb') as cf: + cdata = cf.read() + cover_name = os.path.basename(cover_path) + id, href = oeb.manifest.generate('cover', cover_name) + oeb.manifest.add(id, href, guess_type(cover_name)[0], data=cdata) + oeb.guide.add('cover', 'Cover', href) return oeb diff --git a/src/calibre/ebooks/conversion/plugins/txt_output.py b/src/calibre/ebooks/conversion/plugins/txt_output.py index 4051d2f30f..8b38997c65 100644 --- a/src/calibre/ebooks/conversion/plugins/txt_output.py +++ b/src/calibre/ebooks/conversion/plugins/txt_output.py @@ -137,6 +137,11 @@ class TXTZOutput(TXTOutput): shutil.copy(tf, os.path.join(tdir, txt_name)) # Images + try: + cover_href = oeb_book.guide[oeb_book.metadata.cover[0].term].href + except Exception: + cover_href = None + cover_relhref = None for item in oeb_book.manifest: if item.media_type in OEB_IMAGES: if hasattr(self.writer, 'images'): @@ -148,10 +153,11 @@ class TXTZOutput(TXTOutput): else: path = os.path.join(tdir, os.path.dirname(item.href)) href = os.path.basename(item.href) - if not os.path.exists(path): - os.makedirs(path) + os.makedirs(path, exist_ok=True) with open(os.path.join(path, href), 'wb') as imgf: imgf.write(item.data) + if item.href == cover_href: + cover_relhref = os.path.relpath(imgf.name, tdir).replace(os.sep, '/') # Metadata with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf: @@ -159,6 +165,10 @@ class TXTZOutput(TXTOutput): elem = root.makeelement('text-formatting') elem.text = opts.txt_output_formatting root.append(elem) + if cover_relhref: + elem = root.makeelement('cover-relpath-from-base') + elem.text = cover_relhref + root.append(elem) mdataf.write(xml2str(root, pretty_print=True)) txtz = ZipFile(output_path, 'w') diff --git a/src/calibre/ebooks/metadata/extz.py b/src/calibre/ebooks/metadata/extz.py index e06a101667..ebaa9b58fc 100644 --- a/src/calibre/ebooks/metadata/extz.py +++ b/src/calibre/ebooks/metadata/extz.py @@ -39,6 +39,12 @@ def get_metadata(stream, extract_cover=True): if val.rpartition('.')[2].lower() in {'jpeg', 'jpg', 'png'}: cover_href = val break + else: + # txtz files use a special element for cover + for cpath in opf.root.xpath('//cover-relpath-from-base'): + if cpath.text: + cover_href = cpath.text + break if cover_href: try: mi.cover_data = (os.path.splitext(cover_href)[1], zf.read(cover_href))