No More Sort Prefixes (#3895)

This commit is contained in:
Joe Milazzo 2025-07-05 17:18:11 -05:00 committed by GitHub
parent 9eadf956fb
commit 08c52b4281
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 4095 additions and 3 deletions

View File

@ -0,0 +1,178 @@
using API.Helpers;
using Xunit;
namespace API.Tests.Helpers;
public class BookSortTitlePrefixHelperTests
{
[Theory]
[InlineData("The Avengers", "Avengers")]
[InlineData("A Game of Thrones", "Game of Thrones")]
[InlineData("An American Tragedy", "American Tragedy")]
public void TestEnglishPrefixes(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("El Quijote", "Quijote")]
[InlineData("La Casa de Papel", "Casa de Papel")]
[InlineData("Los Miserables", "Miserables")]
[InlineData("Las Vegas", "Vegas")]
[InlineData("Un Mundo Feliz", "Mundo Feliz")]
[InlineData("Una Historia", "Historia")]
public void TestSpanishPrefixes(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("Le Petit Prince", "Petit Prince")]
[InlineData("La Belle et la Bête", "Belle et la Bête")]
[InlineData("Les Misérables", "Misérables")]
[InlineData("Un Amour de Swann", "Amour de Swann")]
[InlineData("Une Vie", "Vie")]
[InlineData("Des Souris et des Hommes", "Souris et des Hommes")]
public void TestFrenchPrefixes(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("Der Herr der Ringe", "Herr der Ringe")]
[InlineData("Die Verwandlung", "Verwandlung")]
[InlineData("Das Kapital", "Kapital")]
[InlineData("Ein Sommernachtstraum", "Sommernachtstraum")]
[InlineData("Eine Geschichte", "Geschichte")]
public void TestGermanPrefixes(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("Il Nome della Rosa", "Nome della Rosa")]
[InlineData("La Divina Commedia", "Divina Commedia")]
[InlineData("Lo Hobbit", "Hobbit")]
[InlineData("Gli Ultimi", "Ultimi")]
[InlineData("Le Città Invisibili", "Città Invisibili")]
[InlineData("Un Giorno", "Giorno")]
[InlineData("Una Notte", "Notte")]
public void TestItalianPrefixes(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("O Alquimista", "Alquimista")]
[InlineData("A Moreninha", "Moreninha")]
[InlineData("Os Lusíadas", "Lusíadas")]
[InlineData("As Meninas", "Meninas")]
[InlineData("Um Defeito de Cor", "Defeito de Cor")]
[InlineData("Uma História", "História")]
public void TestPortuguesePrefixes(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("", "")] // Empty string returns empty
[InlineData("Book", "Book")] // Single word, no change
[InlineData("Avengers", "Avengers")] // No prefix, no change
public void TestNoPrefixCases(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("The", "The")] // Just a prefix word alone
[InlineData("A", "A")] // Just single letter prefix alone
[InlineData("Le", "Le")] // French prefix alone
public void TestPrefixWordAlone(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("THE AVENGERS", "AVENGERS")] // All caps
[InlineData("the avengers", "avengers")] // All lowercase
[InlineData("The AVENGERS", "AVENGERS")] // Mixed case
[InlineData("tHe AvEnGeRs", "AvEnGeRs")] // Random case
public void TestCaseInsensitivity(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("Then Came You", "Then Came You")] // "The" + "n" = not a prefix
[InlineData("And Then There Were None", "And Then There Were None")] // "An" + "d" = not a prefix
[InlineData("Elsewhere", "Elsewhere")] // "El" + "sewhere" = not a prefix (no space)
[InlineData("Lesson Plans", "Lesson Plans")] // "Les" + "son" = not a prefix (no space)
[InlineData("Theory of Everything", "Theory of Everything")] // "The" + "ory" = not a prefix
public void TestFalsePositivePrefixes(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("The ", "The ")] // Prefix with only space after - returns original
[InlineData("La ", "La ")] // Same for other languages
[InlineData("El ", "El ")] // Same for Spanish
public void TestPrefixWithOnlySpaceAfter(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("The Multiple Spaces", " Multiple Spaces")] // Doesn't trim extra spaces from remainder
[InlineData("Le Petit Prince", " Petit Prince")] // Leading space preserved in remainder
public void TestSpaceHandling(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("The The Matrix", "The Matrix")] // Removes first "The", leaves second
[InlineData("A A Clockwork Orange", "A Clockwork Orange")] // Removes first "A", leaves second
[InlineData("El El Cid", "El Cid")] // Spanish version
public void TestRepeatedPrefixes(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("L'Étranger", "L'Étranger")] // French contraction - no space, no change
[InlineData("D'Artagnan", "D'Artagnan")] // Contraction - no space, no change
[InlineData("The-Matrix", "The-Matrix")] // Hyphen instead of space - no change
[InlineData("The.Avengers", "The.Avengers")] // Period instead of space - no change
public void TestNonSpaceSeparators(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("三国演义", "三国演义")] // Chinese - no processing due to CJK detection
[InlineData("한국어", "한국어")] // Korean - not in CJK range, would be processed normally
public void TestCjkLanguages(string inputString, string expected)
{
// NOTE: These don't do anything, I am waiting for user input on if these are needed
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("नमस्ते दुनिया", "नमस्ते दुनिया")] // Hindi - not CJK, processed normally
[InlineData("مرحبا بالعالم", "مرحبا بالعالم")] // Arabic - not CJK, processed normally
[InlineData("שלום עולם", "שלום עולם")] // Hebrew - not CJK, processed normally
public void TestNonLatinNonCjkScripts(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
[Theory]
[InlineData("в мире", "мире")] // Russian "в" (in) - should be removed
[InlineData("на столе", "столе")] // Russian "на" (on) - should be removed
[InlineData("с друзьями", "друзьями")] // Russian "с" (with) - should be removed
public void TestRussianPrefixes(string inputString, string expected)
{
Assert.Equal(expected, BookSortTitlePrefixHelper.GetSortTitle(inputString));
}
}

View File

@ -972,4 +972,27 @@ public class ScannerServiceTests : AbstractDbTest
Assert.Contains(postLib.Series, x => x.Name == "Immoral Guild");
Assert.Contains(postLib.Series, x => x.Name == "Futoku No Guild");
}
[Fact]
public async Task ScanLibrary_SortName_NoPrefix()
{
const string testcase = "Series with Prefix - Book.json";
var library = await _scannerHelper.GenerateScannerData(testcase);
library.RemovePrefixForSortName = true;
UnitOfWork.LibraryRepository.Update(library);
await UnitOfWork.CommitAsync();
var scanner = _scannerHelper.CreateServices();
await scanner.ScanLibrary(library.Id);
var postLib = await UnitOfWork.LibraryRepository.GetLibraryForIdAsync(library.Id, LibraryIncludes.Series);
Assert.NotNull(postLib);
Assert.Equal(1, postLib.Series.Count);
Assert.Equal("The Avengers", postLib.Series.First().Name);
Assert.Equal("Avengers", postLib.Series.First().SortName);
}
}

View File

@ -0,0 +1,3 @@
[
"The Avengers/The Avengers vol 1.pdf"
]

View File

@ -624,6 +624,8 @@ public class LibraryController : BaseApiController
library.AllowScrobbling = dto.AllowScrobbling;
library.AllowMetadataMatching = dto.AllowMetadataMatching;
library.EnableMetadata = dto.EnableMetadata;
library.RemovePrefixForSortName = dto.RemovePrefixForSortName;
library.LibraryFileTypes = dto.FileGroupTypes
.Select(t => new LibraryFileTypeGroup() {FileTypeGroup = t, LibraryId = library.Id})
.Distinct()

View File

@ -70,4 +70,8 @@ public sealed record LibraryDto
/// Allow Kavita to read metadata (ComicInfo.xml, Epub, PDF)
/// </summary>
public bool EnableMetadata { get; set; } = true;
/// <summary>
/// Should Kavita remove sort articles "The" for the sort name
/// </summary>
public bool RemovePrefixForSortName { get; set; } = false;
}

View File

@ -30,6 +30,8 @@ public sealed record UpdateLibraryDto
public bool AllowMetadataMatching { get; init; }
[Required]
public bool EnableMetadata { get; init; }
[Required]
public bool RemovePrefixForSortName { get; init; }
/// <summary>
/// What types of files to allow the scanner to pickup
/// </summary>

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,29 @@
using Microsoft.EntityFrameworkCore.Migrations;
#nullable disable
namespace API.Data.Migrations
{
/// <inheritdoc />
public partial class LibraryRemoveSortPrefix : Migration
{
/// <inheritdoc />
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.AddColumn<bool>(
name: "RemovePrefixForSortName",
table: "Library",
type: "INTEGER",
nullable: false,
defaultValue: false);
}
/// <inheritdoc />
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropColumn(
name: "RemovePrefixForSortName",
table: "Library");
}
}
}

View File

@ -1341,6 +1341,9 @@ namespace API.Data.Migrations
b.Property<string>("PrimaryColor")
.HasColumnType("TEXT");
b.Property<bool>("RemovePrefixForSortName")
.HasColumnType("INTEGER");
b.Property<string>("SecondaryColor")
.HasColumnType("TEXT");

View File

@ -52,6 +52,10 @@ public class Library : IEntityDate, IHasCoverImage
/// Should Kavita read metadata files from the library
/// </summary>
public bool EnableMetadata { get; set; } = true;
/// <summary>
/// Should Kavita remove sort articles "The" for the sort name
/// </summary>
public bool RemovePrefixForSortName { get; set; } = false;
public DateTime Created { get; set; }

View File

@ -0,0 +1,101 @@
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
namespace API.Helpers;
/// <summary>
/// Responsible for parsing book titles "The man on the street" and removing the prefix -> "man on the street".
/// </summary>
/// <remarks>This code is performance sensitive</remarks>
public static class BookSortTitlePrefixHelper
{
private static readonly Dictionary<string, byte> PrefixLookup;
private static readonly Dictionary<char, List<string>> PrefixesByFirstChar;
static BookSortTitlePrefixHelper()
{
var prefixes = new[]
{
// English
"the", "a", "an",
// Spanish
"el", "la", "los", "las", "un", "una", "unos", "unas",
// French
"le", "la", "les", "un", "une", "des",
// German
"der", "die", "das", "den", "dem", "ein", "eine", "einen", "einer",
// Italian
"il", "lo", "la", "gli", "le", "un", "uno", "una",
// Portuguese
"o", "a", "os", "as", "um", "uma", "uns", "umas",
// Russian (transliterated common ones)
"в", "на", "с", "к", "от", "для",
};
// Build lookup structures
PrefixLookup = new Dictionary<string, byte>(prefixes.Length, StringComparer.OrdinalIgnoreCase);
PrefixesByFirstChar = new Dictionary<char, List<string>>();
foreach (var prefix in prefixes)
{
PrefixLookup[prefix] = 1;
var firstChar = char.ToLowerInvariant(prefix[0]);
if (!PrefixesByFirstChar.TryGetValue(firstChar, out var list))
{
list = [];
PrefixesByFirstChar[firstChar] = list;
}
list.Add(prefix);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ReadOnlySpan<char> GetSortTitle(ReadOnlySpan<char> title)
{
if (title.IsEmpty) return title;
// Fast detection of script type by first character
var firstChar = title[0];
// CJK Unicode ranges - no processing needed for most cases
if ((firstChar >= 0x4E00 && firstChar <= 0x9FFF) || // CJK Unified
(firstChar >= 0x3040 && firstChar <= 0x309F) || // Hiragana
(firstChar >= 0x30A0 && firstChar <= 0x30FF)) // Katakana
{
return title;
}
var firstSpaceIndex = title.IndexOf(' ');
if (firstSpaceIndex <= 0) return title;
var potentialPrefix = title.Slice(0, firstSpaceIndex);
// Fast path: check if first character could match any prefix
firstChar = char.ToLowerInvariant(potentialPrefix[0]);
if (!PrefixesByFirstChar.ContainsKey(firstChar))
return title;
// Only do the expensive lookup if first character matches
if (PrefixLookup.ContainsKey(potentialPrefix.ToString()))
{
var remainder = title.Slice(firstSpaceIndex + 1);
return remainder.IsEmpty ? title : remainder;
}
return title;
}
/// <summary>
/// Removes the sort prefix
/// </summary>
/// <param name="title"></param>
/// <returns></returns>
public static string GetSortTitle(string title)
{
var result = GetSortTitle(title.AsSpan());
return result.ToString();
}
}

View File

@ -126,13 +126,17 @@ public class ProcessSeries : IProcessSeries
series.Format = firstParsedInfo.Format;
}
var removePrefix = library.RemovePrefixForSortName;
var sortName = removePrefix ? BookSortTitlePrefixHelper.GetSortTitle(series.Name) : series.Name;
if (string.IsNullOrEmpty(series.SortName))
{
series.SortName = series.Name;
series.SortName = sortName;
}
if (!series.SortNameLocked)
{
series.SortName = series.Name;
series.SortName = sortName;
if (!string.IsNullOrEmpty(firstParsedInfo.SeriesSort))
{
series.SortName = firstParsedInfo.SeriesSort;

View File

@ -32,6 +32,7 @@ export interface Library {
allowScrobbling: boolean;
allowMetadataMatching: boolean;
enableMetadata: boolean;
removePrefixForSortName: boolean;
collapseSeriesRelationships: boolean;
libraryFileTypes: Array<FileTypeGroup>;
excludePatterns: Array<string>;

View File

@ -127,6 +127,16 @@
</app-setting-item>
</div>
<div class="row g-0 mt-4 mb-4">
<app-setting-switch [title]="t('remove-prefix-for-sortname-label')" [subtitle]="t('remove-prefix-for-sortname-tooltip')">
<ng-template #switch>
<div class="form-check form-switch float-end">
<input type="checkbox" id="remove-prefix-for-sortname" role="switch" formControlName="removePrefixForSortName" class="form-check-input">
</div>
</ng-template>
</app-setting-switch>
</div>
<div class="row g-0 mt-4 mb-4">
<app-setting-switch [title]="t('enable-metadata-label')" [subtitle]="t('enable-metadata-tooltip')">
<ng-template #switch>

View File

@ -115,6 +115,7 @@ export class LibrarySettingsModalComponent implements OnInit {
allowMetadataMatching: new FormControl<boolean>(true, { nonNullable: true, validators: [] }),
collapseSeriesRelationships: new FormControl<boolean>(false, { nonNullable: true, validators: [] }),
enableMetadata: new FormControl<boolean>(true, { nonNullable: true, validators: [] }), // required validator doesn't check value, just if true
removePrefixForSortName: new FormControl<boolean>(false, { nonNullable: true, validators: [] }),
});
selectedFolders: string[] = [];
@ -273,7 +274,8 @@ export class LibrarySettingsModalComponent implements OnInit {
this.libraryForm.get('allowScrobbling')?.setValue(this.IsKavitaPlusEligible ? this.library.allowScrobbling : false);
this.libraryForm.get('allowMetadataMatching')?.setValue(this.IsMetadataDownloadEligible ? this.library.allowMetadataMatching : false);
this.libraryForm.get('excludePatterns')?.setValue(this.excludePatterns ? this.library.excludePatterns : false);
this.libraryForm.get('enableMetadata')?.setValue(this.library.enableMetadata, true);
this.libraryForm.get('enableMetadata')?.setValue(this.library.enableMetadata);
this.libraryForm.get('removePrefixForSortName')?.setValue(this.library.removePrefixForSortName);
this.selectedFolders = this.library.folders;
this.madeChanges = false;

View File

@ -1131,6 +1131,8 @@
"include-in-search-tooltip": "Should series and any derived information (genres, people, files) from the library be included in search results.",
"enable-metadata-label": "Enable Metadata (ComicInfo/Epub/PDF)",
"enable-metadata-tooltip": "Allow Kavita to read metadata files which override filename parsing.",
"remove-prefix-for-sortname-label": "Remove common prefixes for Sort Name",
"remove-prefix-for-sortname-tooltip": "Kavita will remove common prefixes like 'The', 'A', 'An' from titles for sort name. Does not override set metadata.",
"force-scan": "Force Scan",
"force-scan-tooltip": "This will force a scan on the library, treating like a fresh scan",
"reset": "{{common.reset}}",