From 5725b1cd2c689cb718d83c102f73f1aed22e1c92 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 9 Mar 2023 11:40:06 +0530 Subject: [PATCH] Make find_pages useable with things other than directories --- src/calibre/ebooks/comic/input.py | 42 ++++++++++++------- .../ebooks/conversion/plugins/comic_input.py | 10 +++-- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/calibre/ebooks/comic/input.py b/src/calibre/ebooks/comic/input.py index a0424e9f92..878997aa6c 100644 --- a/src/calibre/ebooks/comic/input.py +++ b/src/calibre/ebooks/comic/input.py @@ -39,32 +39,41 @@ def extract_comic(path_to_comic_file): return tdir -def find_pages(dir, sort_on_mtime=False, verbose=False): +def generate_entries_from_dir(path): + from functools import partial + from calibre import walk + ans = {} + for x in walk(path): + x = os.path.abspath(x) + ans[x] = partial(os.path.getmtime, x) + return ans + + +def find_pages(dir_or_items, sort_on_mtime=False, verbose=False): ''' Find valid comic pages in a previously un-archived comic. - :param dir: Directory in which extracted comic lives + :param dir_or_items: Directory in which extracted comic lives or a dict of paths to function getting mtime :param sort_on_mtime: If True sort pages based on their last modified time. Otherwise, sort alphabetically. ''' extensions = {'jpeg', 'jpg', 'gif', 'png', 'webp'} + items = generate_entries_from_dir(dir_or_items) if isinstance(dir_or_items, str) else dir_or_items + sep_counts = set() pages = [] - for datum in os.walk(dir): - for name in datum[-1]: - path = os.path.abspath(os.path.join(datum[0], name)) - if '__MACOSX' in path: - continue - for ext in extensions: - if path.lower().endswith('.'+ext): - pages.append(path) - break - sep_counts = {x.replace(os.sep, '/').count('/') for x in pages} + for path in items: + if '__MACOSX' in path: + continue + ext = path.rpartition('.')[2].lower() + if ext in extensions: + sep_counts.add(path.replace(os.sep, '/').count('/')) + pages.append(path) # Use the full path to sort unless the files are in folders of different # levels, in which case simply use the filenames. basename = os.path.basename if len(sep_counts) > 1 else lambda x: x if sort_on_mtime: def key(x): - return os.stat(x).st_mtime + return items[x]() else: def key(x): return numeric_sort_key(basename(x)) @@ -72,7 +81,12 @@ def find_pages(dir, sort_on_mtime=False, verbose=False): pages.sort(key=key) if verbose: prints('Found comic pages...') - prints('\t'+'\n\t'.join([os.path.relpath(p, dir) for p in pages])) + try: + base = os.path.commonpath(pages) + except ValueError: + pass + else: + prints('\t'+'\n\t'.join([os.path.relpath(p, base) for p in pages])) return pages diff --git a/src/calibre/ebooks/conversion/plugins/comic_input.py b/src/calibre/ebooks/conversion/plugins/comic_input.py index c4c3ceed82..2956dd07d8 100644 --- a/src/calibre/ebooks/conversion/plugins/comic_input.py +++ b/src/calibre/ebooks/conversion/plugins/comic_input.py @@ -6,10 +6,13 @@ __docformat__ = 'restructuredtext en' Based on ideas from comiclrf created by FangornUK. ''' -import shutil, textwrap, codecs, os +import codecs +import os +import shutil +import textwrap -from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre import CurrentDir +from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation from calibre.ptempfile import PersistentTemporaryDirectory @@ -126,8 +129,7 @@ class ComicInput(InputFormatPlugin): return comics def get_pages(self, comic, tdir2): - from calibre.ebooks.comic.input import (extract_comic, process_pages, - find_pages) + from calibre.ebooks.comic.input import extract_comic, find_pages, process_pages tdir = extract_comic(comic) new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort, verbose=self.opts.verbose)