When checking added books for duplicates, also check on the language

field. So books with the same title/authors but different languages are
not considered duplicates.
This commit is contained in:
Juan Pedro Paredes 2017-11-22 01:25:30 +01:00 committed by Kovid Goyal
parent 32a1efe0e9
commit 6451f6244d
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 26 additions and 7 deletions

View File

@ -1931,12 +1931,13 @@ class Cache(object):
author_map = defaultdict(set) author_map = defaultdict(set)
for aid, author in at.id_map.iteritems(): for aid, author in at.id_map.iteritems():
author_map[icu_lower(author)].add(aid) author_map[icu_lower(author)].add(aid)
return (author_map, at.col_book_map.copy(), self.fields['title'].table.book_col_map.copy()) return (author_map, at.col_book_map.copy(), self.fields['title'].table.book_col_map.copy(), self.fields['languages'].book_value_map.copy())
@read_api @read_api
def update_data_for_find_identical_books(self, book_id, data): def update_data_for_find_identical_books(self, book_id, data):
author_map, author_book_map, title_map = data author_map, author_book_map, title_map, lang_map = data
title_map[book_id] = self._field_for('title', book_id) title_map[book_id] = self._field_for('title', book_id)
lang_map[book_id] = self._field_for('languages', book_id)
at = self.fields['authors'].table at = self.fields['authors'].table
for aid in at.book_col_map.get(book_id, ()): for aid in at.book_col_map.get(book_id, ()):
author_map[icu_lower(at.id_map[aid])].add(aid) author_map[icu_lower(at.id_map[aid])].add(aid)

View File

@ -15,6 +15,8 @@ from threading import Lock
from calibre import as_unicode, prints from calibre import as_unicode, prints
from calibre.constants import cache_dir, get_windows_number_formats, iswindows from calibre.constants import cache_dir, get_windows_number_formats, iswindows
from calibre.utils.localization import canonicalize_lang
def force_to_bool(val): def force_to_bool(val):
if isinstance(val, (str, unicode)): if isinstance(val, (str, unicode)):
@ -59,7 +61,7 @@ def fuzzy_title(title):
def find_identical_books(mi, data): def find_identical_books(mi, data):
author_map, aid_map, title_map = data author_map, aid_map, title_map, lang_map = data
found_books = None found_books = None
for a in mi.authors: for a in mi.authors:
author_ids = author_map.get(icu_lower(a)) author_ids = author_map.get(icu_lower(a))
@ -79,7 +81,23 @@ def find_identical_books(mi, data):
title = title_map.get(book_id, '') title = title_map.get(book_id, '')
if fuzzy_title(title) == titleq: if fuzzy_title(title) == titleq:
ans.add(book_id) ans.add(book_id)
return ans
if ans is None:
return set()
alg = set()
langq = canonicalize_lang(mi.language)
if langq is None:
return ans
for book_id in ans:
lang_list = lang_map.get(book_id, '')
if lang_list is None:
return ans
for lang in lang_list:
lang=canonicalize_lang(lang)
if lang == langq:
alg.add(book_id)
return alg
Entry = namedtuple('Entry', 'path size timestamp thumbnail_size') Entry = namedtuple('Entry', 'path size timestamp thumbnail_size')

View File

@ -351,7 +351,7 @@ class DuplicatesQuestion(QDialog): # {{{
QDialog.__init__(self, parent) QDialog.__init__(self, parent)
l = QVBoxLayout() l = QVBoxLayout()
self.setLayout(l) self.setLayout(l)
self.la = la = QLabel(_('Books with the same title and author as the following already exist in the library %s.' self.la = la = QLabel(_('Books with the same, language, title and author as the following already exist in the library %s.'
' Select which books you want copied anyway.') % ' Select which books you want copied anyway.') %
os.path.basename(loc)) os.path.basename(loc))
la.setWordWrap(True) la.setWordWrap(True)

View File

@ -111,7 +111,7 @@
<item row="3" column="0" colspan="2"> <item row="3" column="0" colspan="2">
<widget class="QCheckBox" name="opt_check_for_dupes_on_ctl"> <widget class="QCheckBox" name="opt_check_for_dupes_on_ctl">
<property name="text"> <property name="text">
<string>When using the &quot;Copy to library&quot; action check for &amp;duplicates with the same title and author</string> <string>When using the &quot;Copy to library&quot; action check for &amp;duplicates with the same language, title, and author</string>
</property> </property>
</widget> </widget>
</item> </item>
@ -165,7 +165,7 @@ Title match ignores leading indefinite articles (&quot;the&quot;, &quot;a&quot;,
<property name="toolTip"> <property name="toolTip">
<string>Auto-merge: If books with similar titles and authors found, merge the incoming formats automatically into <string>Auto-merge: If books with similar titles and authors found, merge the incoming formats automatically into
existing book records. This box controls what happens when an existing record already has existing book records. This box controls what happens when an existing record already has
the incoming format: the incoming format:
Ignore duplicate incoming files - means that existing files in your calibre library will not be replaced Ignore duplicate incoming files - means that existing files in your calibre library will not be replaced
Overwrite existing duplicate files - means that existing files in your calibre library will be replaced Overwrite existing duplicate files - means that existing files in your calibre library will be replaced