mirror of
https://github.com/Kareadita/Kavita.git
synced 2025-07-09 03:04:19 -04:00
102 lines
3.3 KiB
C#
102 lines
3.3 KiB
C#
using System;
|
||
using System.Collections.Generic;
|
||
using System.Runtime.CompilerServices;
|
||
|
||
namespace API.Helpers;
|
||
|
||
/// <summary>
|
||
/// Responsible for parsing book titles "The man on the street" and removing the prefix -> "man on the street".
|
||
/// </summary>
|
||
/// <remarks>This code is performance sensitive</remarks>
|
||
public static class BookSortTitlePrefixHelper
|
||
{
|
||
private static readonly Dictionary<string, byte> PrefixLookup;
|
||
private static readonly Dictionary<char, List<string>> PrefixesByFirstChar;
|
||
|
||
static BookSortTitlePrefixHelper()
|
||
{
|
||
var prefixes = new[]
|
||
{
|
||
// English
|
||
"the", "a", "an",
|
||
// Spanish
|
||
"el", "la", "los", "las", "un", "una", "unos", "unas",
|
||
// French
|
||
"le", "la", "les", "un", "une", "des",
|
||
// German
|
||
"der", "die", "das", "den", "dem", "ein", "eine", "einen", "einer",
|
||
// Italian
|
||
"il", "lo", "la", "gli", "le", "un", "uno", "una",
|
||
// Portuguese
|
||
"o", "a", "os", "as", "um", "uma", "uns", "umas",
|
||
// Russian (transliterated common ones)
|
||
"в", "на", "с", "к", "от", "для",
|
||
};
|
||
|
||
// Build lookup structures
|
||
PrefixLookup = new Dictionary<string, byte>(prefixes.Length, StringComparer.OrdinalIgnoreCase);
|
||
PrefixesByFirstChar = new Dictionary<char, List<string>>();
|
||
|
||
foreach (var prefix in prefixes)
|
||
{
|
||
PrefixLookup[prefix] = 1;
|
||
|
||
var firstChar = char.ToLowerInvariant(prefix[0]);
|
||
if (!PrefixesByFirstChar.TryGetValue(firstChar, out var list))
|
||
{
|
||
list = [];
|
||
PrefixesByFirstChar[firstChar] = list;
|
||
}
|
||
list.Add(prefix);
|
||
}
|
||
}
|
||
|
||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||
public static ReadOnlySpan<char> GetSortTitle(ReadOnlySpan<char> title)
|
||
{
|
||
if (title.IsEmpty) return title;
|
||
|
||
// Fast detection of script type by first character
|
||
var firstChar = title[0];
|
||
|
||
// CJK Unicode ranges - no processing needed for most cases
|
||
if ((firstChar >= 0x4E00 && firstChar <= 0x9FFF) || // CJK Unified
|
||
(firstChar >= 0x3040 && firstChar <= 0x309F) || // Hiragana
|
||
(firstChar >= 0x30A0 && firstChar <= 0x30FF)) // Katakana
|
||
{
|
||
return title;
|
||
}
|
||
|
||
var firstSpaceIndex = title.IndexOf(' ');
|
||
if (firstSpaceIndex <= 0) return title;
|
||
|
||
var potentialPrefix = title.Slice(0, firstSpaceIndex);
|
||
|
||
// Fast path: check if first character could match any prefix
|
||
firstChar = char.ToLowerInvariant(potentialPrefix[0]);
|
||
if (!PrefixesByFirstChar.ContainsKey(firstChar))
|
||
return title;
|
||
|
||
// Only do the expensive lookup if first character matches
|
||
if (PrefixLookup.ContainsKey(potentialPrefix.ToString()))
|
||
{
|
||
var remainder = title.Slice(firstSpaceIndex + 1);
|
||
return remainder.IsEmpty ? title : remainder;
|
||
}
|
||
|
||
return title;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Removes the sort prefix
|
||
/// </summary>
|
||
/// <param name="title"></param>
|
||
/// <returns></returns>
|
||
public static string GetSortTitle(string title)
|
||
{
|
||
var result = GetSortTitle(title.AsSpan());
|
||
|
||
return result.ToString();
|
||
}
|
||
}
|