From fc924d5fc643e8ece6ea23fbf6456e33c0429337 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 18 May 2018 13:21:52 +0530 Subject: [PATCH] calibredb add: Run the input plugins before reading metadata instead of after. Matches the behavior in the GUI. Means that metadata reading now happens in the server process instead of the client process when coonencting remotely. Fixes #1771863 [Metadata is read as the first step when importing books from the CLI but as the last when importing from the GUI](https://bugs.launchpad.net/calibre/+bug/1771863) --- src/calibre/db/adding.py | 35 +++++++ src/calibre/db/cli/cmd_add.py | 176 ++++++++++++++++------------------ 2 files changed, 118 insertions(+), 93 deletions(-) diff --git a/src/calibre/db/adding.py b/src/calibre/db/adding.py index b500a5df04..f6990d877c 100644 --- a/src/calibre/db/adding.py +++ b/src/calibre/db/adding.py @@ -9,6 +9,8 @@ __copyright__ = '2013, Kovid Goyal ' import os, time, re from collections import defaultdict from future_builtins import map +from contextlib import contextmanager +from functools import partial from calibre import prints from calibre.constants import iswindows, isosx, filesystem_encoding @@ -105,6 +107,27 @@ def allow_path(path, ext, compiled_rules): return ans +import_ctx = None + + +@contextmanager +def run_import_plugins_before_metadata(tdir, group_id=0): + global import_ctx + import_ctx = {'tdir': tdir, 'group_id': group_id, 'format_map': {}} + yield import_ctx + import_ctx = None + + +def run_import_plugins(formats): + from calibre.ebooks.metadata.worker import run_import_plugins + import_ctx['group_id'] += 1 + ans = run_import_plugins(formats, import_ctx['group_id'], import_ctx['tdir']) + fm = import_ctx['format_map'] + for old_path, new_path in zip(formats, ans): + fm[new_path] = old_path + return ans + + def find_books_in_directory(dirpath, single_book_per_directory, compiled_rules=(), listdir_impl=listdir): dirpath = os.path.abspath(dirpath) if single_book_per_directory: @@ -196,6 +219,18 @@ def recursive_import(db, root, single_book_per_directory=True, return duplicates +def cdb_find_in_dir(dirpath, single_book_per_directory, compiled_rules): + return find_books_in_directory(dirpath, single_book_per_directory=single_book_per_directory, + compiled_rules=compiled_rules, listdir_impl=partial(listdir, sort_by_mtime=True)) + + +def cdb_recursive_find(root, single_book_per_directory=True, compiled_rules=()): + root = os.path.abspath(root) + for dirpath in os.walk(root): + for formats in cdb_find_in_dir(dirpath[0], single_book_per_directory, compiled_rules): + yield formats + + def add_catalog(cache, path, title, dbapi=None): from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.meta import get_metadata diff --git a/src/calibre/db/cli/cmd_add.py b/src/calibre/db/cli/cmd_add.py index e5edcaf403..d434396c94 100644 --- a/src/calibre/db/cli/cmd_add.py +++ b/src/calibre/db/cli/cmd_add.py @@ -6,14 +6,18 @@ from __future__ import absolute_import, division, print_function, unicode_litera import os import sys -from io import BytesIO +from contextlib import contextmanager from optparse import OptionGroup, OptionValueError from calibre import prints -from calibre.db.adding import compile_rule, recursive_import, import_book_directory, import_book_directory_multiple +from calibre.db.adding import ( + cdb_find_in_dir, cdb_recursive_find, compile_rule, create_format_map, + run_import_plugins, run_import_plugins_before_metadata +) from calibre.ebooks.metadata import MetaInformation, string_to_authors from calibre.ebooks.metadata.book.serialize import read_cover -from calibre.ebooks.metadata.meta import get_metadata +from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats +from calibre.ptempfile import TemporaryDirectory from calibre.srv.changes import books_added from calibre.utils.localization import canonicalize_lang @@ -21,12 +25,6 @@ readonly = False version = 0 # change this if you change signature of implementation() -def to_stream(data): - ans = BytesIO(data[1]) - ans.name = data[0] - return ans - - def empty(db, notify_changes, is_remote, args): mi = args[0] ids, duplicates = db.add_books([(mi, {})]) @@ -37,26 +35,64 @@ def empty(db, notify_changes, is_remote, args): def book(db, notify_changes, is_remote, args): - data, fmt, mi, add_duplicates = args - if is_remote: - data = to_stream(data) - ids, duplicates = db.add_books( - [(mi, {fmt: data})], add_duplicates=add_duplicates) + data, fname, fmt, add_duplicates, otitle, oauthors, oisbn, otags, oseries, oseries_index, ocover, oidentifiers, olanguages = args + with add_ctx(), TemporaryDirectory('add-single') as tdir, run_import_plugins_before_metadata(tdir): + if is_remote: + with lopen(os.path.join(tdir, fname), 'wb') as f: + f.write(data[1]) + path = f.name + else: + path = data + path = run_import_plugins([path])[0] + with lopen(path, 'rb') as stream: + mi = get_metadata(stream, stream_type=fmt, use_libprs_metadata=True) + if not mi.title: + mi.title = os.path.splitext(os.path.basename(path))[0] + if not mi.authors: + mi.authors = [_('Unknown')] + if oidentifiers: + ids = mi.get_identifiers() + ids.update(oidentifiers) + mi.set_identifiers(ids) + for x in ('title', 'authors', 'isbn', 'tags', 'series', 'languages'): + val = locals()['o' + x] + if val: + setattr(mi, x, val) + if oseries: + mi.series_index = oseries_index + if ocover: + mi.cover = None + mi.cover_data = ocover + + ids, duplicates = db.add_books( + [(mi, {fmt: path})], add_duplicates=add_duplicates, run_hooks=False) + if is_remote: notify_changes(books_added(ids)) db.dump_metadata() - return ids, bool(duplicates) + return ids, bool(duplicates), mi.title -def add_books(db, notify_changes, is_remote, args): - books, kwargs = args - if is_remote: - books = [(mi, {k:to_stream(v) for k, v in fmt_map.iteritems()}) for mi, fmt_map in books] - ids, duplicates = db.add_books(books, **kwargs) - if is_remote: - notify_changes(books_added(ids)) - db.dump_metadata() - return ids, [(mi.title, [getattr(x, 'name', '') for x in format_map.itervalues()]) for mi, format_map in duplicates] +def format_group(db, notify_changes, is_remote, args): + formats, add_duplicates = args + with add_ctx(), TemporaryDirectory('add-multiple') as tdir, run_import_plugins_before_metadata(tdir): + if is_remote: + paths = [] + for name, data in formats: + with lopen(os.path.join(tdir, name), 'wb') as f: + f.write(data) + paths.append(f.name) + else: + paths = list(formats) + paths = run_import_plugins(paths) + mi = metadata_from_formats(paths) + if mi.title is None: + return None, set(), False + ids, dups = db.add_books([(mi, create_format_map(paths))], add_duplicates=add_duplicates, run_hooks=False) + if is_remote: + notify_changes(books_added(ids)) + db.dump_metadata() + return mi.title, ids, bool(dups) def implementation(db, notify_changes, action, *args): @@ -65,20 +101,6 @@ def implementation(db, notify_changes, action, *args): return func(db, notify_changes, is_remote, args) -class DBProxy(object): - # Allows dbctx to be used with the directory adding API that expects a - # normal db object. Fortunately that API only calls one method, - # add_books() - - def __init__(self, dbctx): - self.new_api = self - self.dbctx = dbctx - - def add_books(self, books, **kwargs): - books = [(read_cover(mi), {k:self.dbctx.path(v) for k, v in fmt_map.iteritems()}) for mi, fmt_map in books] - return self.dbctx.run('add', 'add_books', books, kwargs) - - def do_add_empty( dbctx, title, authors, isbn, tags, series, series_index, cover, identifiers, languages @@ -104,13 +126,19 @@ def do_add_empty( prints(_('Added book ids: %s') % ','.join(map(str, ids))) +@contextmanager +def add_ctx(): + orig = sys.stdout + yield + sys.stdout = orig + + def do_add( dbctx, paths, one_book_per_directory, recurse, add_duplicates, otitle, oauthors, oisbn, otags, oseries, oseries_index, ocover, oidentifiers, olanguages, compiled_rules ): - orig = sys.stdout - try: + with add_ctx(): files, dirs = [], [] for path in paths: path = os.path.abspath(path) @@ -128,56 +156,25 @@ def do_add( fmt = fmt[1:] if fmt else None if not fmt: continue - with lopen(book, 'rb') as stream: - mi = get_metadata(stream, stream_type=fmt, use_libprs_metadata=True) - if not mi.title: - mi.title = os.path.splitext(os.path.basename(book))[0] - if not mi.authors: - mi.authors = [_('Unknown')] - if oidentifiers: - ids = mi.get_identifiers() - ids.update(oidentifiers) - mi.set_identifiers(ids) - for x in ('title', 'authors', 'isbn', 'tags', 'series', 'languages'): - val = locals()['o' + x] - if val: - setattr(mi, x, val) - if oseries: - mi.series_index = oseries_index - if ocover: - mi.cover = ocover - mi.cover_data = (None, None) - - ids, dups = dbctx.run( - 'add', 'book', dbctx.path(book), fmt, read_cover(mi), add_duplicates + ids, dups, book_title = dbctx.run( + 'add', 'book', dbctx.path(book), os.path.basename(book), fmt, add_duplicates, + otitle, oauthors, oisbn, otags, oseries, oseries_index, read_cover(ocover) if ocover else None, + oidentifiers, olanguages ) added_ids |= set(ids) if dups: - file_duplicates.append((mi.title, book)) + file_duplicates.append((book_title, book)) dir_dups = [] - dbproxy = DBProxy(dbctx) - + scanner = cdb_recursive_find if recurse else cdb_find_in_dir for dpath in dirs: - if recurse: - dups = recursive_import( - dbproxy, - dpath, - single_book_per_directory=one_book_per_directory, - added_ids=added_ids, - compiled_rules=compiled_rules, - add_duplicates=add_duplicates - ) or [] - else: - func = import_book_directory if one_book_per_directory else import_book_directory_multiple - dups = func( - dbproxy, - dpath, - added_ids=added_ids, - compiled_rules=compiled_rules, - add_duplicates=add_duplicates - ) or [] - dir_dups.extend(dups) + for formats in scanner(dpath, one_book_per_directory, compiled_rules): + book_title, ids, dups = dbctx.run( + 'add', 'format_group', tuple(map(dbctx.path, formats)), add_duplicates) + if book_title is not None: + added_ids |= set(ids) + if dups: + dir_dups.append((book_title, formats)) sys.stdout = sys.__stdout__ @@ -201,8 +198,6 @@ def do_add( if added_ids: prints(_('Added book ids: %s') % (', '.join(map(type(u''), added_ids)))) - finally: - sys.stdout = orig def option_parser(get_parser, args): @@ -291,14 +286,9 @@ the directory related options below. ) def filter_pat(option, opt, value, parser, action): + rule = {'match_type': 'glob', 'query': value, 'action': action} try: - getattr(parser.values, option.dest).append( - compile_rule({ - 'match_type': 'glob', - 'query': value, - 'action': action - }) - ) + getattr(parser.values, option.dest).append(compile_rule(rule)) except Exception: raise OptionValueError('%r is not a valid filename pattern' % value)