Cleanup lang duplicates patch and add some tests

This commit is contained in:
Kovid Goyal 2017-11-29 17:05:39 +05:30
parent 6451f6244d
commit 043ea2d905
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
5 changed files with 23 additions and 18 deletions

View File

@ -35,6 +35,7 @@ from calibre.ptempfile import (base_dir, PersistentTemporaryFile,
from calibre.utils.config import prefs, tweaks
from calibre.utils.date import now as nowf, utcnow, UNDEFINED_DATE
from calibre.utils.icu import sort_key
from calibre.utils.localization import canonicalize_lang
def api(f):
@ -1952,6 +1953,7 @@ class Cache(object):
title (title is fuzzy matched). See also :meth:`data_for_find_identical_books`. '''
from calibre.db.utils import fuzzy_title
identical_book_ids = set()
langq = tuple(filter(lambda x: x and x != 'und', map(canonicalize_lang, mi.languages or ())))
if mi.authors:
try:
quathors = mi.authors[:20] # Too many authors causes parsing of the search expression to fail
@ -1980,7 +1982,9 @@ class Cache(object):
fbook_title = fuzzy_title(fbook_title)
mbook_title = fuzzy_title(mi.title)
if fbook_title == mbook_title:
identical_book_ids.add(book_id)
bl = self._field_for('languages', book_id)
if not langq or not bl or bl == langq:
identical_book_ids.add(book_id)
return identical_book_ids
@read_api

View File

@ -684,11 +684,17 @@ class ReadingTest(BaseTest):
from calibre.db.utils import find_identical_books
# 'find_identical_books': [(,), (Metadata('unknown'),), (Metadata('xxxx'),)],
cache = self.init_cache(self.library_path)
cache.set_field('languages', {1: ('fra', 'deu')})
data = cache.data_for_find_identical_books()
lm = cache.get_metadata(1)
lm2 = cache.get_metadata(1)
lm2.languages = ['eng']
for mi, books in (
(Metadata('title one', ['author one']), {2}),
(Metadata(_('Unknown')), {3}),
(Metadata('title two', ['author one']), {1}),
(lm, {1}),
(lm2, set()),
):
self.assertEqual(books, cache.find_identical_books(mi))
self.assertEqual(books, find_identical_books(mi, data))

View File

@ -34,6 +34,7 @@ def force_to_bool(val):
val = None
return val
_fuzzy_title_patterns = None
@ -82,22 +83,15 @@ def find_identical_books(mi, data):
if fuzzy_title(title) == titleq:
ans.add(book_id)
if ans is None:
return set()
alg = set()
langq = canonicalize_lang(mi.language)
if langq is None:
langq = tuple(filter(lambda x: x and x != 'und', map(canonicalize_lang, mi.languages or ())))
if not langq:
return ans
for book_id in ans:
lang_list = lang_map.get(book_id, '')
if lang_list is None:
return ans
for lang in lang_list:
lang=canonicalize_lang(lang)
if lang == langq:
alg.add(book_id)
return alg
def lang_matches(book_id):
book_langq = lang_map.get(book_id)
return not book_langq or langq == book_langq
return {book_id for book_id in ans if lang_matches(book_id)}
Entry = namedtuple('Entry', 'path size timestamp thumbnail_size')
@ -383,6 +377,7 @@ class ThumbnailCache(object):
if hasattr(self, 'total_size'):
self._apply_size()
number_separators = None

View File

@ -351,7 +351,7 @@ class DuplicatesQuestion(QDialog): # {{{
QDialog.__init__(self, parent)
l = QVBoxLayout()
self.setLayout(l)
self.la = la = QLabel(_('Books with the same, language, title and author as the following already exist in the library %s.'
self.la = la = QLabel(_('Books with the same, title, author and language as the following already exist in the library %s.'
' Select which books you want copied anyway.') %
os.path.basename(loc))
la.setWordWrap(True)

View File

@ -111,7 +111,7 @@
<item row="3" column="0" colspan="2">
<widget class="QCheckBox" name="opt_check_for_dupes_on_ctl">
<property name="text">
<string>When using the &quot;Copy to library&quot; action check for &amp;duplicates with the same language, title, and author</string>
<string>When using the &quot;Copy to library&quot; action check for &amp;duplicates with the same title, author and language</string>
</property>
</widget>
</item>