mirror of
				https://github.com/advplyr/audiobookshelf.git
				synced 2025-11-03 19:07:00 -05:00 
			
		
		
		
	Fuzzy Matching V1
This commit is contained in:
		
							parent
							
								
									fea28351f9
								
							
						
					
					
						commit
						ac746f199b
					
				@ -60,13 +60,13 @@ class BookFinder {
 | 
			
		||||
    // Remove single quotes (i.e. "Ender's Game" becomes "Enders Game")
 | 
			
		||||
    cleaned = cleaned.replace(/'/g, '')
 | 
			
		||||
    cleaned = this.replaceAccentedChars(cleaned)
 | 
			
		||||
    return cleaned.toLowerCase()
 | 
			
		||||
    return cleaned
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  cleanAuthorForCompares(author) {
 | 
			
		||||
    if (!author) return ''
 | 
			
		||||
    var cleaned = this.replaceAccentedChars(author)
 | 
			
		||||
    return cleaned.toLowerCase()
 | 
			
		||||
    return cleaned
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  filterSearchResults(books, title, author, maxTitleDistance, maxAuthorDistance) {
 | 
			
		||||
@ -181,12 +181,113 @@ class BookFinder {
 | 
			
		||||
    return books
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  addTitleCandidate(title, candidates) {
 | 
			
		||||
    // Main variant
 | 
			
		||||
    const cleanTitle = this.cleanTitleForCompares(title).trim()
 | 
			
		||||
    if (!cleanTitle) return
 | 
			
		||||
    candidates.add(cleanTitle)
 | 
			
		||||
 | 
			
		||||
    let candidate = cleanTitle
 | 
			
		||||
 | 
			
		||||
    // Remove subtitle
 | 
			
		||||
    candidate = candidate.replace(/([,:;_]| by ).*/g, "").trim()
 | 
			
		||||
    if (candidate)
 | 
			
		||||
      candidates.add(candidate)
 | 
			
		||||
 | 
			
		||||
    // Remove preceding/trailing numbers
 | 
			
		||||
    candidate = candidate.replace(/^\d+ | \d+$/g, "").trim()
 | 
			
		||||
    if (candidate)
 | 
			
		||||
      candidates.add(candidate)
 | 
			
		||||
 | 
			
		||||
    // Remove bitrate
 | 
			
		||||
    candidate = candidate.replace(/(^| )\d+k(bps)?( |$)/, " ").trim()
 | 
			
		||||
    if (candidate)
 | 
			
		||||
      candidates.add(candidate)
 | 
			
		||||
 | 
			
		||||
    // Remove edition
 | 
			
		||||
    candidate = candidate.replace(/ (2nd|3rd|\d+th)\s+ed(\.|ition)?/, "").trim()
 | 
			
		||||
    if (candidate)
 | 
			
		||||
      candidates.add(candidate)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async search(provider, title, author, isbn, asin, options = {}) {
 | 
			
		||||
    var books = []
 | 
			
		||||
    var maxTitleDistance = !isNaN(options.titleDistance) ? Number(options.titleDistance) : 4
 | 
			
		||||
    var maxAuthorDistance = !isNaN(options.authorDistance) ? Number(options.authorDistance) : 4
 | 
			
		||||
    const maxTitleDistance = !isNaN(options.titleDistance) ? Number(options.titleDistance) : 4
 | 
			
		||||
    const maxAuthorDistance = !isNaN(options.authorDistance) ? Number(options.authorDistance) : 4
 | 
			
		||||
    const maxFuzzySearches = 5
 | 
			
		||||
    var numFuzzySearches = 0
 | 
			
		||||
 | 
			
		||||
    if (!title)
 | 
			
		||||
      return books
 | 
			
		||||
 | 
			
		||||
    books = await this.runSearch(title, author, provider, asin, maxTitleDistance, maxAuthorDistance)
 | 
			
		||||
 | 
			
		||||
    if (!books.length && maxFuzzySearches > 0) {
 | 
			
		||||
      // normalize title and author
 | 
			
		||||
      title = title.trim().toLowerCase()
 | 
			
		||||
      author = author.trim().toLowerCase()
 | 
			
		||||
 | 
			
		||||
      // Now run up to maxFuzzySearches fuzzy searches
 | 
			
		||||
      var candidates = new Set()
 | 
			
		||||
      var cleanedAuthor = this.cleanAuthorForCompares(author)
 | 
			
		||||
      this.addTitleCandidate(title, candidates)
 | 
			
		||||
 | 
			
		||||
      // remove parentheses and their contents, and replace with a separator
 | 
			
		||||
      const cleanTitle = title.replace(/\[.*?\]|\(.*?\)|{.*?}/g, " - ")
 | 
			
		||||
      // Split title into hypen-separated parts
 | 
			
		||||
      const titleParts = cleanTitle.split(/ - | -|- /)
 | 
			
		||||
      for (const titlePart of titleParts) {
 | 
			
		||||
        this.addTitleCandidate(titlePart, candidates)
 | 
			
		||||
      }
 | 
			
		||||
      // We already searched for original title
 | 
			
		||||
      if (author == cleanedAuthor) candidates.delete(title)
 | 
			
		||||
      if (candidates.size > 0) {
 | 
			
		||||
        candidates = [...candidates]
 | 
			
		||||
        candidates.sort((a, b) => {
 | 
			
		||||
          // Candidates that include the author are likely low quality
 | 
			
		||||
          const includesAuthorDiff = !b.includes(cleanedAuthor) - !a.includes(cleanedAuthor)
 | 
			
		||||
          if (includesAuthorDiff) return includesAuthorDiff
 | 
			
		||||
          // Candidates that include only digits are also likely low quality
 | 
			
		||||
          const onlyDigits = /^\d+$/
 | 
			
		||||
          const includesOnlyDigitsDiff = !onlyDigits.test(b) - !onlyDigits.test(a)
 | 
			
		||||
          if (includesOnlyDigitsDiff) return includesOnlyDigitsDiff
 | 
			
		||||
          // Start with longer candidaets, as they are likely more specific
 | 
			
		||||
          const lengthDiff = b.length - a.length
 | 
			
		||||
          if (lengthDiff) return lengthDiff
 | 
			
		||||
          return b.localeCompare(a)
 | 
			
		||||
        })
 | 
			
		||||
        Logger.debug(`[BookFinder] Found ${candidates.length} fuzzy title candidates`)
 | 
			
		||||
        Logger.debug(candidates)
 | 
			
		||||
        for (const candidate of candidates) {
 | 
			
		||||
          if (++numFuzzySearches > maxFuzzySearches) return books
 | 
			
		||||
          books = await this.runSearch(candidate, cleanedAuthor, provider, asin, maxTitleDistance, maxAuthorDistance)
 | 
			
		||||
          if (books.length) break
 | 
			
		||||
        }
 | 
			
		||||
        if (!books.length) {
 | 
			
		||||
          // Now try searching without the author
 | 
			
		||||
          for (const candidate of candidates) {
 | 
			
		||||
            if (++numFuzzySearches > maxFuzzySearches) return books
 | 
			
		||||
            books = await this.runSearch(candidate, '', provider, asin, maxTitleDistance, maxAuthorDistance)
 | 
			
		||||
            if (books.length) break
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (provider === 'openlibrary') {
 | 
			
		||||
      books.sort((a, b) => {
 | 
			
		||||
        return a.totalDistance - b.totalDistance
 | 
			
		||||
      })
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return books
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async runSearch(title, author, provider, asin, maxTitleDistance, maxAuthorDistance) {
 | 
			
		||||
    Logger.debug(`Book Search: title: "${title}", author: "${author || ''}", provider: ${provider}`)
 | 
			
		||||
 | 
			
		||||
    var books = []
 | 
			
		||||
 | 
			
		||||
    if (provider === 'google') {
 | 
			
		||||
      books = await this.getGoogleBooksResults(title, author)
 | 
			
		||||
    } else if (provider.startsWith('audible')) {
 | 
			
		||||
@ -203,23 +304,6 @@ class BookFinder {
 | 
			
		||||
    else {
 | 
			
		||||
      books = await this.getGoogleBooksResults(title, author)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (!books.length && !options.currentlyTryingCleaned) {
 | 
			
		||||
      var cleanedTitle = this.cleanTitleForCompares(title)
 | 
			
		||||
      var cleanedAuthor = this.cleanAuthorForCompares(author)
 | 
			
		||||
      if (cleanedTitle == title && cleanedAuthor == author) return books
 | 
			
		||||
 | 
			
		||||
      Logger.debug(`Book Search, no matches.. checking cleaned title and author`)
 | 
			
		||||
      options.currentlyTryingCleaned = true
 | 
			
		||||
      return this.search(provider, cleanedTitle, cleanedAuthor, isbn, asin, options)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (provider === 'openlibrary') {
 | 
			
		||||
      books.sort((a, b) => {
 | 
			
		||||
        return a.totalDistance - b.totalDistance
 | 
			
		||||
      })
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return books
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user