mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
calibredb add: New option --automerge to automatically merge duplicates
Have the option accept different merge algorithms. Also implement it for recursive adding.
This commit is contained in:
parent
fd97af9b1d
commit
54dbba6f06
@ -9,19 +9,18 @@ from contextlib import contextmanager
|
||||
from optparse import OptionGroup, OptionValueError
|
||||
|
||||
from calibre import prints
|
||||
from calibre.db.utils import find_identical_books
|
||||
from calibre.db.copy_to_library import automerge_book
|
||||
from calibre.db.adding import (
|
||||
cdb_find_in_dir, cdb_recursive_find, compile_rule, create_format_map,
|
||||
run_import_plugins, run_import_plugins_before_metadata
|
||||
)
|
||||
from calibre.db.utils import find_identical_books
|
||||
from calibre.ebooks.metadata import MetaInformation, string_to_authors
|
||||
from calibre.ebooks.metadata.book.serialize import read_cover, serialize_cover
|
||||
from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.srv.changes import books_added
|
||||
from calibre.srv.changes import books_added, formats_added
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
from calibre.utils.config import tweaks
|
||||
from calibre.utils.short_uuid import uuid4
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
readonly = False
|
||||
@ -37,8 +36,83 @@ def empty(db, notify_changes, is_remote, args):
|
||||
return ids, bool(duplicates)
|
||||
|
||||
|
||||
def cached_identical_book_data(db, request_id):
|
||||
key = db.library_id, request_id
|
||||
if getattr(cached_identical_book_data, 'key', None) != key:
|
||||
cached_identical_book_data.key = key
|
||||
cached_identical_book_data.ans = db.data_for_find_identical_books()
|
||||
return cached_identical_book_data.ans
|
||||
|
||||
|
||||
def do_adding(db, request_id, notify_changes, is_remote, mi, format_map, add_duplicates, oautomerge):
|
||||
identical_book_list, added_ids, updated_ids = set(), set(), set()
|
||||
duplicates = []
|
||||
identical_books_data = None
|
||||
|
||||
def add_format(book_id, fmt):
|
||||
db.add_format(book_id, fmt, format_map[fmt], replace=True, run_hooks=False)
|
||||
updated_ids.add(book_id)
|
||||
|
||||
def add_book():
|
||||
nonlocal added_ids
|
||||
added_ids_, duplicates_ = db.add_books(
|
||||
[(mi, format_map)], add_duplicates=True, run_hooks=False)
|
||||
added_ids |= set(added_ids_)
|
||||
duplicates.extend(duplicates_)
|
||||
|
||||
if oautomerge != 'disabled' or not add_duplicates:
|
||||
identical_books_data = cached_identical_book_data(db, request_id)
|
||||
identical_book_list = find_identical_books(mi, identical_books_data)
|
||||
|
||||
if oautomerge != 'disabled':
|
||||
if identical_book_list:
|
||||
needs_add = False
|
||||
duplicated_formats = set()
|
||||
for book_id in identical_book_list:
|
||||
book_formats = {q.upper() for q in db.formats(book_id)}
|
||||
input_formats = {q.upper():q for q in format_map}
|
||||
common_formats = book_formats & set(input_formats)
|
||||
if not common_formats:
|
||||
for x in input_formats:
|
||||
add_format(book_id, input_formats[x])
|
||||
else:
|
||||
new_formats = set(input_formats) - book_formats
|
||||
if new_formats:
|
||||
for x in new_formats:
|
||||
add_format(book_id, input_formats[x])
|
||||
if oautomerge == 'overwrite':
|
||||
for x in common_formats:
|
||||
add_format(book_id, input_formats[x])
|
||||
elif oautomerge == 'ignore':
|
||||
for x in common_formats:
|
||||
duplicated_formats.add(input_formats[x])
|
||||
elif oautomerge == 'new_record':
|
||||
needs_add = True
|
||||
if needs_add:
|
||||
add_book()
|
||||
if duplicated_formats:
|
||||
duplicates.append((mi, {x: format_map[x] for x in duplicated_formats}))
|
||||
else:
|
||||
add_book()
|
||||
else:
|
||||
if identical_book_list:
|
||||
duplicates.append((mi, format_map))
|
||||
else:
|
||||
add_book()
|
||||
if added_ids and identical_books_data is not None:
|
||||
for book_id in added_ids:
|
||||
db.update_data_for_find_identical_books(book_id, identical_books_data)
|
||||
|
||||
if is_remote:
|
||||
notify_changes(books_added(added_ids))
|
||||
if updated_ids:
|
||||
notify_changes(formats_added({book_id: tuple(format_map) for book_id in updated_ids}))
|
||||
db.dump_metadata()
|
||||
return added_ids, updated_ids, duplicates
|
||||
|
||||
|
||||
def book(db, notify_changes, is_remote, args):
|
||||
data, fname, fmt, add_duplicates, otitle, oauthors, oisbn, otags, oseries, oseries_index, ocover, oidentifiers, olanguages, oautomerge = args
|
||||
data, fname, fmt, add_duplicates, otitle, oauthors, oisbn, otags, oseries, oseries_index, ocover, oidentifiers, olanguages, oautomerge, request_id = args
|
||||
with add_ctx(), TemporaryDirectory('add-single') as tdir, run_import_plugins_before_metadata(tdir):
|
||||
if is_remote:
|
||||
with lopen(os.path.join(tdir, fname), 'wb') as f:
|
||||
@ -70,29 +144,18 @@ def book(db, notify_changes, is_remote, args):
|
||||
mi.cover_data = ocover
|
||||
|
||||
identical_book_list, added_ids, updated_ids = set(), set(), set()
|
||||
if oautomerge:
|
||||
identical_books_data = identical_books_data = db.data_for_find_identical_books()
|
||||
identical_book_list = find_identical_books(mi, identical_books_data)
|
||||
add_duplicates=True
|
||||
if len(identical_book_list) > 0:
|
||||
for book_id in identical_book_list:
|
||||
db.add_format(book_id, fmt, path, replace='overwrite', run_hooks=False)
|
||||
updated_ids=identical_book_list
|
||||
duplicates=False
|
||||
else:
|
||||
added_ids, duplicates = db.add_books(
|
||||
[(mi, {fmt: path})], add_duplicates=add_duplicates, run_hooks=False)
|
||||
duplicates = []
|
||||
identical_books_data = None
|
||||
added_ids, updated_ids, duplicates = do_adding(
|
||||
db, request_id, notify_changes, is_remote, mi, {fmt: path}, add_duplicates, oautomerge)
|
||||
|
||||
if is_remote:
|
||||
notify_changes(books_added(added_ids))
|
||||
notify_changes(books_added(updated_ids))
|
||||
db.dump_metadata()
|
||||
return added_ids, updated_ids, bool(duplicates), mi.title
|
||||
|
||||
|
||||
def format_group(db, notify_changes, is_remote, args):
|
||||
formats, add_duplicates, cover_data = args
|
||||
formats, add_duplicates, oautomerge, request_id, cover_data = args
|
||||
with add_ctx(), TemporaryDirectory('add-multiple') as tdir, run_import_plugins_before_metadata(tdir):
|
||||
updated_ids = {}
|
||||
if is_remote:
|
||||
paths = []
|
||||
for name, data in formats:
|
||||
@ -104,14 +167,13 @@ def format_group(db, notify_changes, is_remote, args):
|
||||
paths = run_import_plugins(paths)
|
||||
mi = metadata_from_formats(paths)
|
||||
if mi.title is None:
|
||||
return None, set(), False
|
||||
return None, set(), set(), False
|
||||
if cover_data and not mi.cover_data or not mi.cover_data[1]:
|
||||
mi.cover_data = 'jpeg', cover_data
|
||||
ids, dups = db.add_books([(mi, create_format_map(paths))], add_duplicates=add_duplicates, run_hooks=False)
|
||||
if is_remote:
|
||||
notify_changes(books_added(ids))
|
||||
db.dump_metadata()
|
||||
return mi.title, ids, bool(dups)
|
||||
format_map = create_format_map(paths)
|
||||
added_ids, updated_ids, duplicates = do_adding(
|
||||
db, request_id, notify_changes, is_remote, mi, format_map, add_duplicates, oautomerge)
|
||||
return mi.title, set(added_ids), set(updated_ids), bool(duplicates)
|
||||
|
||||
|
||||
def implementation(db, notify_changes, action, *args):
|
||||
@ -157,6 +219,7 @@ def do_add(
|
||||
oisbn, otags, oseries, oseries_index, ocover, oidentifiers, olanguages,
|
||||
compiled_rules, oautomerge
|
||||
):
|
||||
request_id = uuid4()
|
||||
with add_ctx():
|
||||
files, dirs = [], []
|
||||
for path in paths:
|
||||
@ -178,7 +241,7 @@ def do_add(
|
||||
aids, mids, dups, book_title = dbctx.run(
|
||||
'add', 'book', dbctx.path(book), os.path.basename(book), fmt, add_duplicates,
|
||||
otitle, oauthors, oisbn, otags, oseries, oseries_index, serialize_cover(ocover) if ocover else None,
|
||||
oidentifiers, olanguages, oautomerge
|
||||
oidentifiers, olanguages, oautomerge, request_id
|
||||
)
|
||||
added_ids |= set(aids)
|
||||
merged_ids |= set(mids)
|
||||
@ -204,10 +267,11 @@ def do_add(
|
||||
except EnvironmentError:
|
||||
pass
|
||||
|
||||
book_title, ids, dups = dbctx.run(
|
||||
'add', 'format_group', tuple(map(dbctx.path, formats)), add_duplicates, cover_data)
|
||||
book_title, ids, mids, dups = dbctx.run(
|
||||
'add', 'format_group', tuple(map(dbctx.path, formats)), add_duplicates, oautomerge, request_id, cover_data)
|
||||
if book_title is not None:
|
||||
added_ids |= set(ids)
|
||||
merged_ids |= set(mids)
|
||||
if dups:
|
||||
dir_dups.append((book_title, formats))
|
||||
|
||||
@ -234,7 +298,7 @@ def do_add(
|
||||
if added_ids:
|
||||
prints(_('Added book ids: %s') % (', '.join(map(unicode_type, added_ids))))
|
||||
if merged_ids:
|
||||
prints(_('Updated book ids: %s') % (', '.join(map(unicode_type, merged_ids))))
|
||||
prints(_('Merged book ids: %s') % (', '.join(map(unicode_type, merged_ids))))
|
||||
|
||||
|
||||
def option_parser(get_parser, args):
|
||||
@ -254,16 +318,21 @@ the directory related options below.
|
||||
action='store_true',
|
||||
default=False,
|
||||
help=_(
|
||||
'Add books to database even if they already exist. Comparison is done based on book titles.'
|
||||
)
|
||||
'Add books to database even if they already exist. Comparison is done based on book titles and authors.'
|
||||
' Note that the {} option takes precedence.'
|
||||
).format('--automerge')
|
||||
)
|
||||
parser.add_option(
|
||||
'-m',
|
||||
'--automerge',
|
||||
action='store_true',
|
||||
default=False,
|
||||
type='choice',
|
||||
choices=('disabled', 'ignore', 'overwrite', 'new_record'),
|
||||
default='disabled',
|
||||
help=_(
|
||||
'Add or upgrade existing book(s) to database. Comparison is done based on book titles, autor and language.\nSearch the library for the specified book and decide: \n * To update its format and language on the library if the book is newer than the existing one in the library.\n * To add to the library if the format and language does not exist.\n * To discard action if none of the above.'
|
||||
'If books with similar titles and authors are found, merge the incoming formats (files) automatically into'
|
||||
' existing book records. A value of "ignore" means duplicate formats are discarded. A value of'
|
||||
' "overwrite" means duplicate formats in the library are overwritten with the newly added files.'
|
||||
' A value of "new_record" means duplicate formats are placed into a new book record.'
|
||||
)
|
||||
)
|
||||
parser.add_option(
|
||||
|
@ -149,7 +149,7 @@
|
||||
<item row="8" column="0">
|
||||
<widget class="QCheckBox" name="opt_add_formats_to_existing">
|
||||
<property name="toolTip">
|
||||
<string>Auto-merge: If books with similar titles and authors found, merge the incoming formats automatically into
|
||||
<string>Auto-merge: If books with similar titles and authors are found, merge the incoming formats automatically into
|
||||
existing book records. The box to the right controls what happens when an existing record already has
|
||||
the incoming format. Note that this option also affects the Copy to library action.
|
||||
|
||||
@ -163,7 +163,7 @@ Title match ignores leading indefinite articles ("the", "a",
|
||||
<item row="8" column="1">
|
||||
<widget class="QComboBox" name="opt_automerge">
|
||||
<property name="toolTip">
|
||||
<string>Auto-merge: If books with similar titles and authors found, merge the incoming formats (files) automatically into
|
||||
<string>Auto-merge: If books with similar titles and authors are found, merge the incoming formats (files) automatically into
|
||||
existing book records. This box controls what happens when an existing record already has
|
||||
the incoming format:
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user