From 1d3ad38187708ca0c6efefce2d04b82820f19522 Mon Sep 17 00:00:00 2001 From: mikiher Date: Sat, 30 Sep 2023 18:08:03 +0000 Subject: [PATCH 01/13] [cleanup] refactor OpenLib sort into getOpenLibResult --- server/finders/BookFinder.js | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index 96735cc9..debac709 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -136,6 +136,10 @@ class BookFinder { if (!booksFiltered.length && books.length) { if (this.verbose) Logger.debug(`Search has ${books.length} matches, but no close title matches`) } + booksFiltered.sort((a, b) => { + return a.totalDistance - b.totalDistance + }) + return booksFiltered } @@ -282,12 +286,6 @@ class BookFinder { } } - if (provider === 'openlibrary') { - books.sort((a, b) => { - return a.totalDistance - b.totalDistance - }) - } - return books } From 46b0b3a6efb7f31ac7d67ee5fff6dcbd2ff28542 Mon Sep 17 00:00:00 2001 From: mikiher Date: Sun, 1 Oct 2023 08:42:47 +0000 Subject: [PATCH 02/13] [cleanup] Refactor candidates logic to separate class --- server/finders/BookFinder.js | 113 ++++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 47 deletions(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index debac709..b30510f2 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -183,35 +183,67 @@ class BookFinder { return books } - addTitleCandidate(title, candidates) { - // Main variant - const cleanTitle = this.cleanTitleForCompares(title).trim() - if (!cleanTitle) return - candidates.add(cleanTitle) + static TitleCandidates = class { - let candidate = cleanTitle + constructor(bookFinder, cleanAuthor) { + this.bookFinder = bookFinder + this.candidates = new Set() + this.cleanAuthor = cleanAuthor + } - // Remove subtitle - candidate = candidate.replace(/([,:;_]| by ).*/g, "").trim() - if (candidate) - candidates.add(candidate) + add(title) { + const titleTransformers = [ + [/([,:;_]| by ).*/g, ''], // Remove subtitle + [/^\d+ | \d+$/g, ''], // Remove preceding/trailing numbers + [/(^| )\d+k(bps)?( |$)/, ' '], // Remove bitrate + [/ (2nd|3rd|\d+th)\s+ed(\.|ition)?/g, ''] // Remove edition + ] - // Remove preceding/trailing numbers - candidate = candidate.replace(/^\d+ | \d+$/g, "").trim() - if (candidate) - candidates.add(candidate) + // Main variant + const cleanTitle = this.bookFinder.cleanTitleForCompares(title).trim() + if (!cleanTitle) return + this.candidates.add(cleanTitle) - // Remove bitrate - candidate = candidate.replace(/(^| )\d+k(bps)?( |$)/, " ").trim() - if (candidate) - candidates.add(candidate) + let candidate = cleanTitle - // Remove edition - candidate = candidate.replace(/ (2nd|3rd|\d+th)\s+ed(\.|ition)?/, "").trim() - if (candidate) - candidates.add(candidate) + for (const transformer of titleTransformers) { + candidate = candidate.replace(transformer[0], transformer[1]).trim() + if (candidate) { + this.candidates.add(candidate) + } + } + } + + get size() { + return this.candidates.size + } + + getCandidates() { + var candidates = [...this.candidates] + candidates.sort((a, b) => { + // Candidates that include the author are likely low quality + const includesAuthorDiff = !b.includes(this.cleanAuthor) - !a.includes(this.cleanAuthor) + if (includesAuthorDiff) return includesAuthorDiff + // Candidates that include only digits are also likely low quality + const onlyDigits = /^\d+$/ + const includesOnlyDigitsDiff = !onlyDigits.test(b) - !onlyDigits.test(a) + if (includesOnlyDigitsDiff) return includesOnlyDigitsDiff + // Start with longer candidaets, as they are likely more specific + const lengthDiff = b.length - a.length + if (lengthDiff) return lengthDiff + return b.localeCompare(a) + }) + Logger.debug(`[${this.constructor.name}] Found ${candidates.length} fuzzy title candidates`) + Logger.debug(candidates) + return candidates + } + + delete(title) { + return this.candidates.delete(title) + } } + /** * Search for books including fuzzy searches * @@ -240,46 +272,33 @@ class BookFinder { title = title.trim().toLowerCase() author = author.trim().toLowerCase() + const cleanAuthor = this.cleanAuthorForCompares(author) + // Now run up to maxFuzzySearches fuzzy searches - let candidates = new Set() - let cleanedAuthor = this.cleanAuthorForCompares(author) - this.addTitleCandidate(title, candidates) + let titleCandidates = new BookFinder.TitleCandidates(this, cleanAuthor) + titleCandidates.add(title) // remove parentheses and their contents, and replace with a separator const cleanTitle = title.replace(/\[.*?\]|\(.*?\)|{.*?}/g, " - ") // Split title into hypen-separated parts const titleParts = cleanTitle.split(/ - | -|- /) for (const titlePart of titleParts) { - this.addTitleCandidate(titlePart, candidates) + titleCandidates.add(titlePart) } // We already searched for original title - if (author == cleanedAuthor) candidates.delete(title) - if (candidates.size > 0) { - candidates = [...candidates] - candidates.sort((a, b) => { - // Candidates that include the author are likely low quality - const includesAuthorDiff = !b.includes(cleanedAuthor) - !a.includes(cleanedAuthor) - if (includesAuthorDiff) return includesAuthorDiff - // Candidates that include only digits are also likely low quality - const onlyDigits = /^\d+$/ - const includesOnlyDigitsDiff = !onlyDigits.test(b) - !onlyDigits.test(a) - if (includesOnlyDigitsDiff) return includesOnlyDigitsDiff - // Start with longer candidaets, as they are likely more specific - const lengthDiff = b.length - a.length - if (lengthDiff) return lengthDiff - return b.localeCompare(a) - }) - Logger.debug(`[BookFinder] Found ${candidates.length} fuzzy title candidates`, candidates) - for (const candidate of candidates) { + if (author == cleanAuthor) titleCandidates.delete(title) + if (titleCandidates.size > 0) { + titleCandidates = titleCandidates.getCandidates() + for (const titleCandidate of titleCandidates) { if (++numFuzzySearches > maxFuzzySearches) return books - books = await this.runSearch(candidate, cleanedAuthor, provider, asin, maxTitleDistance, maxAuthorDistance) + books = await this.runSearch(titleCandidate, cleanAuthor, provider, asin, maxTitleDistance, maxAuthorDistance) if (books.length) break } if (!books.length) { // Now try searching without the author - for (const candidate of candidates) { + for (const titleCandidate of titleCandidates) { if (++numFuzzySearches > maxFuzzySearches) return books - books = await this.runSearch(candidate, '', provider, asin, maxTitleDistance, maxAuthorDistance) + books = await this.runSearch(titleCandidate, '', provider, asin, maxTitleDistance, maxAuthorDistance) if (books.length) break } } From 5d7c197c893d10277f59c753e2d324837185a78f Mon Sep 17 00:00:00 2001 From: mikiher Date: Tue, 3 Oct 2023 19:43:37 +0000 Subject: [PATCH 03/13] [fix] Add back toLowerCase to cleanAuthor/Title (required by other uses) --- server/finders/BookFinder.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index b30510f2..aa66fb92 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -59,12 +59,12 @@ class BookFinder { // Remove single quotes (i.e. "Ender's Game" becomes "Enders Game") cleaned = cleaned.replace(/'/g, '') - return this.replaceAccentedChars(cleaned) + return this.replaceAccentedChars(cleaned).toLowerCase() } cleanAuthorForCompares(author) { if (!author) return '' - return this.replaceAccentedChars(author) + return this.replaceAccentedChars(author).toLowerCase() } filterSearchResults(books, title, author, maxTitleDistance, maxAuthorDistance) { From 10f5bc8cbeeacd3c47f7115f387dd7d5817982e7 Mon Sep 17 00:00:00 2001 From: mikiher Date: Wed, 4 Oct 2023 05:26:16 +0000 Subject: [PATCH 04/13] [cleanup] Make original title/author check with more readable --- server/finders/BookFinder.js | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index aa66fb92..6ca238ee 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -276,7 +276,6 @@ class BookFinder { // Now run up to maxFuzzySearches fuzzy searches let titleCandidates = new BookFinder.TitleCandidates(this, cleanAuthor) - titleCandidates.add(title) // remove parentheses and their contents, and replace with a separator const cleanTitle = title.replace(/\[.*?\]|\(.*?\)|{.*?}/g, " - ") @@ -285,16 +284,15 @@ class BookFinder { for (const titlePart of titleParts) { titleCandidates.add(titlePart) } - // We already searched for original title - if (author == cleanAuthor) titleCandidates.delete(title) if (titleCandidates.size > 0) { titleCandidates = titleCandidates.getCandidates() for (const titleCandidate of titleCandidates) { + if (titleCandidate == title && cleanAuthor == author) continue // We already tried this if (++numFuzzySearches > maxFuzzySearches) return books books = await this.runSearch(titleCandidate, cleanAuthor, provider, asin, maxTitleDistance, maxAuthorDistance) if (books.length) break } - if (!books.length) { + if (!books.length && cleanAuthor) { // Now try searching without the author for (const titleCandidate of titleCandidates) { if (++numFuzzySearches > maxFuzzySearches) return books From 752bfffb1109e8fadf87775ecacf588365608b03 Mon Sep 17 00:00:00 2001 From: mikiher Date: Wed, 4 Oct 2023 14:53:12 +0000 Subject: [PATCH 05/13] [enhamcement] Only add title candidate before and after all transforms --- server/finders/BookFinder.js | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index 6ca238ee..1fe86718 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -206,12 +206,11 @@ class BookFinder { let candidate = cleanTitle - for (const transformer of titleTransformers) { + for (const transformer of titleTransformers) candidate = candidate.replace(transformer[0], transformer[1]).trim() - if (candidate) { - this.candidates.add(candidate) - } - } + + if (candidate) + this.candidates.add(candidate) } get size() { From 8979586404a1ca4a46b0eff3d1cc23582ffbfbb5 Mon Sep 17 00:00:00 2001 From: mikiher Date: Thu, 5 Oct 2023 10:28:55 +0000 Subject: [PATCH 06/13] [enhancement] Improve candidate sorting --- server/finders/BookFinder.js | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index 1fe86718..2bd1c571 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -189,9 +189,11 @@ class BookFinder { this.bookFinder = bookFinder this.candidates = new Set() this.cleanAuthor = cleanAuthor + this.priorities = {} + this.positions = {} } - add(title) { + add(title, position = 0) { const titleTransformers = [ [/([,:;_]| by ).*/g, ''], // Remove subtitle [/^\d+ | \d+$/g, ''], // Remove preceding/trailing numbers @@ -203,14 +205,22 @@ class BookFinder { const cleanTitle = this.bookFinder.cleanTitleForCompares(title).trim() if (!cleanTitle) return this.candidates.add(cleanTitle) + this.priorities[cleanTitle] = 0 + this.positions[cleanTitle] = position let candidate = cleanTitle for (const transformer of titleTransformers) candidate = candidate.replace(transformer[0], transformer[1]).trim() - if (candidate) - this.candidates.add(candidate) + if (candidate != cleanTitle) { + if (candidate) { + this.candidates.add(candidate) + this.priorities[candidate] = 0 + this.positions[candidate] = position + } + this.priorities[cleanTitle] = 1 + } } get size() { @@ -227,6 +237,12 @@ class BookFinder { const onlyDigits = /^\d+$/ const includesOnlyDigitsDiff = !onlyDigits.test(b) - !onlyDigits.test(a) if (includesOnlyDigitsDiff) return includesOnlyDigitsDiff + // transformed candidates receive higher priority + const priorityDiff = this.priorities[a] - this.priorities[b] + if (priorityDiff) return priorityDiff + // if same priorirty, prefer candidates that are closer to the beginning (e.g. titles before subtitles) + const positionDiff = this.positions[a] - this.positions[b] + if (positionDiff) return positionDiff // Start with longer candidaets, as they are likely more specific const lengthDiff = b.length - a.length if (lengthDiff) return lengthDiff @@ -280,8 +296,8 @@ class BookFinder { const cleanTitle = title.replace(/\[.*?\]|\(.*?\)|{.*?}/g, " - ") // Split title into hypen-separated parts const titleParts = cleanTitle.split(/ - | -|- /) - for (const titlePart of titleParts) { - titleCandidates.add(titlePart) + for (const [position, titlePart] of titleParts.entries()) { + titleCandidates.add(titlePart, position) } if (titleCandidates.size > 0) { titleCandidates = titleCandidates.getCandidates() From 9eff471afaa87572bfcb312af64d756511fde2a3 Mon Sep 17 00:00:00 2001 From: mikiher Date: Thu, 5 Oct 2023 11:39:29 +0000 Subject: [PATCH 07/13] [enhancement] AuthorCandidates, author validation --- server/finders/BookFinder.js | 100 +++++++++++++++++++++++++++++------ 1 file changed, 84 insertions(+), 16 deletions(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index 2bd1c571..b29417cb 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -194,6 +194,12 @@ class BookFinder { } add(title, position = 0) { + // if title contains the author, remove it + if (this.cleanAuthor) { + const authorRe = new RegExp(`(^| | by |)${this.cleanAuthor}(?= |$)`, "g") + title = this.bookFinder.cleanAuthorForCompares(title).replace(authorRe, '').trim() + } + const titleTransformers = [ [/([,:;_]| by ).*/g, ''], // Remove subtitle [/^\d+ | \d+$/g, ''], // Remove preceding/trailing numbers @@ -258,6 +264,73 @@ class BookFinder { } } + static AuthorCandidates = class { + constructor(bookFinder, cleanAuthor) { + this.bookFinder = bookFinder + this.candidates = new Set() + this.cleanAuthor = cleanAuthor + if (cleanAuthor) this.candidates.add(cleanAuthor) + } + + validateAuthor(name, region = '', maxLevenshtein = 3) { + return this.bookFinder.audnexus.authorASINsRequest(name, region).then((asins) => { + for (const asin of asins) { + let cleanName = this.bookFinder.cleanAuthorForCompares(asin.name) + if (!cleanName) continue + if (cleanName.includes(name)) return name + if (name.includes(cleanName)) return cleanName + if (levenshteinDistance(cleanName, name) <= maxLevenshtein) return cleanName + } + return '' + }) + } + + add(author) { + const authorTransformers = [] + + // Main variant + const cleanAuthor = this.bookFinder.cleanAuthorForCompares(author).trim() + if (!cleanAuthor) return false + this.candidates.add(cleanAuthor) + + let candidate = cleanAuthor + + for (const transformer of authorTransformers) { + candidate = candidate.replace(transformer[0], transformer[1]).trim() + if (candidate) { + this.candidates.add(candidate) + } + } + + return true + } + + get size() { + return this.candidates.size + } + + async getCandidates() { + var filteredCandidates = [] + var promises = [] + for (const candidate of this.candidates) { + promises.push(this.validateAuthor(candidate)) + } + const results = [...new Set(await Promise.all(promises))] + filteredCandidates = results.filter(author => author) + // if no valid candidates were found, add back the original clean author + if (!filteredCandidates.length && this.cleanAuthor) filteredCandidates.push(this.cleanAuthor) + // always add an empty author candidate + filteredCandidates.push('') + Logger.debug(`[${this.constructor.name}] Found ${filteredCandidates.length} fuzzy author candidates`) + Logger.debug(filteredCandidates) + return filteredCandidates + } + + delete(author) { + return this.candidates.delete(author) + } + } + /** * Search for books including fuzzy searches @@ -290,30 +363,25 @@ class BookFinder { const cleanAuthor = this.cleanAuthorForCompares(author) // Now run up to maxFuzzySearches fuzzy searches - let titleCandidates = new BookFinder.TitleCandidates(this, cleanAuthor) + let authorCandidates = new BookFinder.AuthorCandidates(this, cleanAuthor) // remove parentheses and their contents, and replace with a separator const cleanTitle = title.replace(/\[.*?\]|\(.*?\)|{.*?}/g, " - ") // Split title into hypen-separated parts const titleParts = cleanTitle.split(/ - | -|- /) - for (const [position, titlePart] of titleParts.entries()) { - titleCandidates.add(titlePart, position) - } - if (titleCandidates.size > 0) { + for (const titlePart of titleParts) + authorCandidates.add(titlePart) + authorCandidates = await authorCandidates.getCandidates() + for (const authorCandidate of authorCandidates) { + let titleCandidates = new BookFinder.TitleCandidates(this, authorCandidate) + for (const [position, titlePart] of titleParts.entries()) + titleCandidates.add(titlePart, position) titleCandidates = titleCandidates.getCandidates() for (const titleCandidate of titleCandidates) { - if (titleCandidate == title && cleanAuthor == author) continue // We already tried this + if (titleCandidate == title && authorCandidate == author) continue // We already tried this if (++numFuzzySearches > maxFuzzySearches) return books - books = await this.runSearch(titleCandidate, cleanAuthor, provider, asin, maxTitleDistance, maxAuthorDistance) - if (books.length) break - } - if (!books.length && cleanAuthor) { - // Now try searching without the author - for (const titleCandidate of titleCandidates) { - if (++numFuzzySearches > maxFuzzySearches) return books - books = await this.runSearch(titleCandidate, '', provider, asin, maxTitleDistance, maxAuthorDistance) - if (books.length) break - } + books = await this.runSearch(titleCandidate, authorCandidate, provider, asin, maxTitleDistance, maxAuthorDistance) + if (books.length) return books } } } From b2acdadcea6fa52636d816166beac24cb370e127 Mon Sep 17 00:00:00 2001 From: mikiher Date: Thu, 5 Oct 2023 12:22:02 +0000 Subject: [PATCH 08/13] [enhancement] Added a couple title transformers --- server/finders/BookFinder.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index b29417cb..8876e2bd 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -202,9 +202,11 @@ class BookFinder { const titleTransformers = [ [/([,:;_]| by ).*/g, ''], // Remove subtitle - [/^\d+ | \d+$/g, ''], // Remove preceding/trailing numbers [/(^| )\d+k(bps)?( |$)/, ' '], // Remove bitrate - [/ (2nd|3rd|\d+th)\s+ed(\.|ition)?/g, ''] // Remove edition + [/ (2nd|3rd|\d+th)\s+ed(\.|ition)?/g, ''], // Remove edition + [/(^| |\.)(m4b|m4a|mp3)( |$)/g, ''], // Remove file-type + [/ a novel.*$/g, ''], // Remove "a novel" + [/^\d+ | \d+$/g, ''], // Remove preceding/trailing numbers ] // Main variant From f3555a12ceff25d328b7dd1637668874e181946e Mon Sep 17 00:00:00 2001 From: mikiher Date: Thu, 5 Oct 2023 14:50:16 +0000 Subject: [PATCH 09/13] [enhancement] Handle initials in author normalization --- server/finders/BookFinder.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index 8876e2bd..70031fa3 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -64,7 +64,12 @@ class BookFinder { cleanAuthorForCompares(author) { if (!author) return '' - return this.replaceAccentedChars(author).toLowerCase() + let cleanAuthor = this.replaceAccentedChars(author).toLowerCase() + // separate initials + cleanAuthor = cleanAuthor.replace(/([a-z])\.([a-z])/g, '$1. $2') + // remove middle initials + cleanAuthor = cleanAuthor.replace(/(?<=\w\w)(\s+[a-z]\.?)+(?=\s+\w\w)/g, '') + return cleanAuthor } filterSearchResults(books, title, author, maxTitleDistance, maxAuthorDistance) { From bf9f3895db17f2172cda4e32caab559eda9c05a1 Mon Sep 17 00:00:00 2001 From: mikiher Date: Thu, 5 Oct 2023 17:53:54 +0000 Subject: [PATCH 10/13] [enhancement] Treat underscores as title part separators --- server/finders/BookFinder.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index 70031fa3..e3e87f4a 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -372,8 +372,8 @@ class BookFinder { // Now run up to maxFuzzySearches fuzzy searches let authorCandidates = new BookFinder.AuthorCandidates(this, cleanAuthor) - // remove parentheses and their contents, and replace with a separator - const cleanTitle = title.replace(/\[.*?\]|\(.*?\)|{.*?}/g, " - ") + // remove underscores and parentheses with their contents, and replace with a separator + const cleanTitle = title.replace(/\[.*?\]|\(.*?\)|{.*?}|_/g, " - ") // Split title into hypen-separated parts const titleParts = cleanTitle.split(/ - | -|- /) for (const titlePart of titleParts) From b0b7a0a61817671b15e2687a32399aea6f0bdb51 Mon Sep 17 00:00:00 2001 From: mikiher Date: Thu, 5 Oct 2023 18:27:52 +0000 Subject: [PATCH 11/13] [enhancement] Reduce spurious matches in validateAuthor --- server/finders/BookFinder.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index e3e87f4a..d3192142 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -279,9 +279,10 @@ class BookFinder { if (cleanAuthor) this.candidates.add(cleanAuthor) } - validateAuthor(name, region = '', maxLevenshtein = 3) { + validateAuthor(name, region = '', maxLevenshtein = 2) { return this.bookFinder.audnexus.authorASINsRequest(name, region).then((asins) => { - for (const asin of asins) { + for (const [i, asin] of asins.entries()) { + if (i > 10) break let cleanName = this.bookFinder.cleanAuthorForCompares(asin.name) if (!cleanName) continue if (cleanName.includes(name)) return name From f44b7ed1d0f8ba538e194632f98660893d9206a6 Mon Sep 17 00:00:00 2001 From: mikiher Date: Thu, 5 Oct 2023 18:41:18 +0000 Subject: [PATCH 12/13] [enhancement] If no valid authors, use clean author field --- server/finders/BookFinder.js | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index d3192142..8c420333 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -317,6 +317,14 @@ class BookFinder { return this.candidates.size } + get agressivelyCleanAuthor() { + if (this.cleanAuthor) { + const agressivelyCleanAuthor = this.cleanAuthor.replace(/[,/-].*$/, '').trim() + return agressivelyCleanAuthor ? agressivelyCleanAuthor : this.cleanAuthor + } + return '' + } + async getCandidates() { var filteredCandidates = [] var promises = [] @@ -325,9 +333,9 @@ class BookFinder { } const results = [...new Set(await Promise.all(promises))] filteredCandidates = results.filter(author => author) - // if no valid candidates were found, add back the original clean author - if (!filteredCandidates.length && this.cleanAuthor) filteredCandidates.push(this.cleanAuthor) - // always add an empty author candidate + // If no valid candidates were found, add back an aggresively cleaned author version + if (!filteredCandidates.length && this.cleanAuthor) filteredCandidates.push(this.agressivelyCleanAuthor) + // Always add an empty author candidate filteredCandidates.push('') Logger.debug(`[${this.constructor.name}] Found ${filteredCandidates.length} fuzzy author candidates`) Logger.debug(filteredCandidates) @@ -364,7 +372,7 @@ class BookFinder { books = await this.runSearch(title, author, provider, asin, maxTitleDistance, maxAuthorDistance) if (!books.length && maxFuzzySearches > 0) { - // normalize title and author + // Normalize title and author title = title.trim().toLowerCase() author = author.trim().toLowerCase() @@ -373,7 +381,7 @@ class BookFinder { // Now run up to maxFuzzySearches fuzzy searches let authorCandidates = new BookFinder.AuthorCandidates(this, cleanAuthor) - // remove underscores and parentheses with their contents, and replace with a separator + // Remove underscores and parentheses with their contents, and replace with a separator const cleanTitle = title.replace(/\[.*?\]|\(.*?\)|{.*?}|_/g, " - ") // Split title into hypen-separated parts const titleParts = cleanTitle.split(/ - | -|- /) From f8f555b4b6ce1ef64dea6913a42d37fabc0f105f Mon Sep 17 00:00:00 2001 From: mikiher Date: Sat, 7 Oct 2023 21:28:25 +0000 Subject: [PATCH 13/13] Remove some unused code in AuthorCandidates.add --- server/finders/BookFinder.js | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/server/finders/BookFinder.js b/server/finders/BookFinder.js index 54ac63a4..a0b64f55 100644 --- a/server/finders/BookFinder.js +++ b/server/finders/BookFinder.js @@ -294,23 +294,9 @@ class BookFinder { } add(author) { - const authorTransformers = [] - - // Main variant const cleanAuthor = this.bookFinder.cleanAuthorForCompares(author).trim() - if (!cleanAuthor) return false + if (!cleanAuthor) return this.candidates.add(cleanAuthor) - - let candidate = cleanAuthor - - for (const transformer of authorTransformers) { - candidate = candidate.replace(transformer[0], transformer[1]).trim() - if (candidate) { - this.candidates.add(candidate) - } - } - - return true } get size() {