calibredb add: Run the input plugins before reading metadata instead of after.

Matches the behavior in the GUI. Means that metadata reading now happens
in the server process instead of the client process when coonencting
remotely. Fixes #1771863 [Metadata is read as the first step when importing books from the CLI but as the last when importing from the GUI](https://bugs.launchpad.net/calibre/+bug/1771863)
This commit is contained in:
Kovid Goyal 2018-05-18 13:21:52 +05:30
parent b0fa6bdd2e
commit fc924d5fc6
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 118 additions and 93 deletions

View File

@ -9,6 +9,8 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import os, time, re import os, time, re
from collections import defaultdict from collections import defaultdict
from future_builtins import map from future_builtins import map
from contextlib import contextmanager
from functools import partial
from calibre import prints from calibre import prints
from calibre.constants import iswindows, isosx, filesystem_encoding from calibre.constants import iswindows, isosx, filesystem_encoding
@ -105,6 +107,27 @@ def allow_path(path, ext, compiled_rules):
return ans return ans
import_ctx = None
@contextmanager
def run_import_plugins_before_metadata(tdir, group_id=0):
global import_ctx
import_ctx = {'tdir': tdir, 'group_id': group_id, 'format_map': {}}
yield import_ctx
import_ctx = None
def run_import_plugins(formats):
from calibre.ebooks.metadata.worker import run_import_plugins
import_ctx['group_id'] += 1
ans = run_import_plugins(formats, import_ctx['group_id'], import_ctx['tdir'])
fm = import_ctx['format_map']
for old_path, new_path in zip(formats, ans):
fm[new_path] = old_path
return ans
def find_books_in_directory(dirpath, single_book_per_directory, compiled_rules=(), listdir_impl=listdir): def find_books_in_directory(dirpath, single_book_per_directory, compiled_rules=(), listdir_impl=listdir):
dirpath = os.path.abspath(dirpath) dirpath = os.path.abspath(dirpath)
if single_book_per_directory: if single_book_per_directory:
@ -196,6 +219,18 @@ def recursive_import(db, root, single_book_per_directory=True,
return duplicates return duplicates
def cdb_find_in_dir(dirpath, single_book_per_directory, compiled_rules):
return find_books_in_directory(dirpath, single_book_per_directory=single_book_per_directory,
compiled_rules=compiled_rules, listdir_impl=partial(listdir, sort_by_mtime=True))
def cdb_recursive_find(root, single_book_per_directory=True, compiled_rules=()):
root = os.path.abspath(root)
for dirpath in os.walk(root):
for formats in cdb_find_in_dir(dirpath[0], single_book_per_directory, compiled_rules):
yield formats
def add_catalog(cache, path, title, dbapi=None): def add_catalog(cache, path, title, dbapi=None):
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata

View File

@ -6,14 +6,18 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import os import os
import sys import sys
from io import BytesIO from contextlib import contextmanager
from optparse import OptionGroup, OptionValueError from optparse import OptionGroup, OptionValueError
from calibre import prints from calibre import prints
from calibre.db.adding import compile_rule, recursive_import, import_book_directory, import_book_directory_multiple from calibre.db.adding import (
cdb_find_in_dir, cdb_recursive_find, compile_rule, create_format_map,
run_import_plugins, run_import_plugins_before_metadata
)
from calibre.ebooks.metadata import MetaInformation, string_to_authors from calibre.ebooks.metadata import MetaInformation, string_to_authors
from calibre.ebooks.metadata.book.serialize import read_cover from calibre.ebooks.metadata.book.serialize import read_cover
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
from calibre.ptempfile import TemporaryDirectory
from calibre.srv.changes import books_added from calibre.srv.changes import books_added
from calibre.utils.localization import canonicalize_lang from calibre.utils.localization import canonicalize_lang
@ -21,12 +25,6 @@ readonly = False
version = 0 # change this if you change signature of implementation() version = 0 # change this if you change signature of implementation()
def to_stream(data):
ans = BytesIO(data[1])
ans.name = data[0]
return ans
def empty(db, notify_changes, is_remote, args): def empty(db, notify_changes, is_remote, args):
mi = args[0] mi = args[0]
ids, duplicates = db.add_books([(mi, {})]) ids, duplicates = db.add_books([(mi, {})])
@ -37,26 +35,64 @@ def empty(db, notify_changes, is_remote, args):
def book(db, notify_changes, is_remote, args): def book(db, notify_changes, is_remote, args):
data, fmt, mi, add_duplicates = args data, fname, fmt, add_duplicates, otitle, oauthors, oisbn, otags, oseries, oseries_index, ocover, oidentifiers, olanguages = args
if is_remote: with add_ctx(), TemporaryDirectory('add-single') as tdir, run_import_plugins_before_metadata(tdir):
data = to_stream(data) if is_remote:
ids, duplicates = db.add_books( with lopen(os.path.join(tdir, fname), 'wb') as f:
[(mi, {fmt: data})], add_duplicates=add_duplicates) f.write(data[1])
path = f.name
else:
path = data
path = run_import_plugins([path])[0]
with lopen(path, 'rb') as stream:
mi = get_metadata(stream, stream_type=fmt, use_libprs_metadata=True)
if not mi.title:
mi.title = os.path.splitext(os.path.basename(path))[0]
if not mi.authors:
mi.authors = [_('Unknown')]
if oidentifiers:
ids = mi.get_identifiers()
ids.update(oidentifiers)
mi.set_identifiers(ids)
for x in ('title', 'authors', 'isbn', 'tags', 'series', 'languages'):
val = locals()['o' + x]
if val:
setattr(mi, x, val)
if oseries:
mi.series_index = oseries_index
if ocover:
mi.cover = None
mi.cover_data = ocover
ids, duplicates = db.add_books(
[(mi, {fmt: path})], add_duplicates=add_duplicates, run_hooks=False)
if is_remote: if is_remote:
notify_changes(books_added(ids)) notify_changes(books_added(ids))
db.dump_metadata() db.dump_metadata()
return ids, bool(duplicates) return ids, bool(duplicates), mi.title
def add_books(db, notify_changes, is_remote, args): def format_group(db, notify_changes, is_remote, args):
books, kwargs = args formats, add_duplicates = args
if is_remote: with add_ctx(), TemporaryDirectory('add-multiple') as tdir, run_import_plugins_before_metadata(tdir):
books = [(mi, {k:to_stream(v) for k, v in fmt_map.iteritems()}) for mi, fmt_map in books] if is_remote:
ids, duplicates = db.add_books(books, **kwargs) paths = []
if is_remote: for name, data in formats:
notify_changes(books_added(ids)) with lopen(os.path.join(tdir, name), 'wb') as f:
db.dump_metadata() f.write(data)
return ids, [(mi.title, [getattr(x, 'name', '<stream>') for x in format_map.itervalues()]) for mi, format_map in duplicates] paths.append(f.name)
else:
paths = list(formats)
paths = run_import_plugins(paths)
mi = metadata_from_formats(paths)
if mi.title is None:
return None, set(), False
ids, dups = db.add_books([(mi, create_format_map(paths))], add_duplicates=add_duplicates, run_hooks=False)
if is_remote:
notify_changes(books_added(ids))
db.dump_metadata()
return mi.title, ids, bool(dups)
def implementation(db, notify_changes, action, *args): def implementation(db, notify_changes, action, *args):
@ -65,20 +101,6 @@ def implementation(db, notify_changes, action, *args):
return func(db, notify_changes, is_remote, args) return func(db, notify_changes, is_remote, args)
class DBProxy(object):
# Allows dbctx to be used with the directory adding API that expects a
# normal db object. Fortunately that API only calls one method,
# add_books()
def __init__(self, dbctx):
self.new_api = self
self.dbctx = dbctx
def add_books(self, books, **kwargs):
books = [(read_cover(mi), {k:self.dbctx.path(v) for k, v in fmt_map.iteritems()}) for mi, fmt_map in books]
return self.dbctx.run('add', 'add_books', books, kwargs)
def do_add_empty( def do_add_empty(
dbctx, title, authors, isbn, tags, series, series_index, cover, identifiers, dbctx, title, authors, isbn, tags, series, series_index, cover, identifiers,
languages languages
@ -104,13 +126,19 @@ def do_add_empty(
prints(_('Added book ids: %s') % ','.join(map(str, ids))) prints(_('Added book ids: %s') % ','.join(map(str, ids)))
@contextmanager
def add_ctx():
orig = sys.stdout
yield
sys.stdout = orig
def do_add( def do_add(
dbctx, paths, one_book_per_directory, recurse, add_duplicates, otitle, oauthors, dbctx, paths, one_book_per_directory, recurse, add_duplicates, otitle, oauthors,
oisbn, otags, oseries, oseries_index, ocover, oidentifiers, olanguages, oisbn, otags, oseries, oseries_index, ocover, oidentifiers, olanguages,
compiled_rules compiled_rules
): ):
orig = sys.stdout with add_ctx():
try:
files, dirs = [], [] files, dirs = [], []
for path in paths: for path in paths:
path = os.path.abspath(path) path = os.path.abspath(path)
@ -128,56 +156,25 @@ def do_add(
fmt = fmt[1:] if fmt else None fmt = fmt[1:] if fmt else None
if not fmt: if not fmt:
continue continue
with lopen(book, 'rb') as stream: ids, dups, book_title = dbctx.run(
mi = get_metadata(stream, stream_type=fmt, use_libprs_metadata=True) 'add', 'book', dbctx.path(book), os.path.basename(book), fmt, add_duplicates,
if not mi.title: otitle, oauthors, oisbn, otags, oseries, oseries_index, read_cover(ocover) if ocover else None,
mi.title = os.path.splitext(os.path.basename(book))[0] oidentifiers, olanguages
if not mi.authors:
mi.authors = [_('Unknown')]
if oidentifiers:
ids = mi.get_identifiers()
ids.update(oidentifiers)
mi.set_identifiers(ids)
for x in ('title', 'authors', 'isbn', 'tags', 'series', 'languages'):
val = locals()['o' + x]
if val:
setattr(mi, x, val)
if oseries:
mi.series_index = oseries_index
if ocover:
mi.cover = ocover
mi.cover_data = (None, None)
ids, dups = dbctx.run(
'add', 'book', dbctx.path(book), fmt, read_cover(mi), add_duplicates
) )
added_ids |= set(ids) added_ids |= set(ids)
if dups: if dups:
file_duplicates.append((mi.title, book)) file_duplicates.append((book_title, book))
dir_dups = [] dir_dups = []
dbproxy = DBProxy(dbctx) scanner = cdb_recursive_find if recurse else cdb_find_in_dir
for dpath in dirs: for dpath in dirs:
if recurse: for formats in scanner(dpath, one_book_per_directory, compiled_rules):
dups = recursive_import( book_title, ids, dups = dbctx.run(
dbproxy, 'add', 'format_group', tuple(map(dbctx.path, formats)), add_duplicates)
dpath, if book_title is not None:
single_book_per_directory=one_book_per_directory, added_ids |= set(ids)
added_ids=added_ids, if dups:
compiled_rules=compiled_rules, dir_dups.append((book_title, formats))
add_duplicates=add_duplicates
) or []
else:
func = import_book_directory if one_book_per_directory else import_book_directory_multiple
dups = func(
dbproxy,
dpath,
added_ids=added_ids,
compiled_rules=compiled_rules,
add_duplicates=add_duplicates
) or []
dir_dups.extend(dups)
sys.stdout = sys.__stdout__ sys.stdout = sys.__stdout__
@ -201,8 +198,6 @@ def do_add(
if added_ids: if added_ids:
prints(_('Added book ids: %s') % (', '.join(map(type(u''), added_ids)))) prints(_('Added book ids: %s') % (', '.join(map(type(u''), added_ids))))
finally:
sys.stdout = orig
def option_parser(get_parser, args): def option_parser(get_parser, args):
@ -291,14 +286,9 @@ the directory related options below.
) )
def filter_pat(option, opt, value, parser, action): def filter_pat(option, opt, value, parser, action):
rule = {'match_type': 'glob', 'query': value, 'action': action}
try: try:
getattr(parser.values, option.dest).append( getattr(parser.values, option.dest).append(compile_rule(rule))
compile_rule({
'match_type': 'glob',
'query': value,
'action': action
})
)
except Exception: except Exception:
raise OptionValueError('%r is not a valid filename pattern' % value) raise OptionValueError('%r is not a valid filename pattern' % value)