mirror of
				https://github.com/Kareadita/Kavita.git
				synced 2025-11-04 03:27:05 -05:00 
			
		
		
		
	
		
			
				
	
	
		
			102 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
			
		
		
	
	
			102 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
using System;
 | 
						||
using System.Collections.Generic;
 | 
						||
using System.Runtime.CompilerServices;
 | 
						||
 | 
						||
namespace API.Helpers;
 | 
						||
 | 
						||
/// <summary>
 | 
						||
/// Responsible for parsing book titles "The man on the street" and removing the prefix -> "man on the street".
 | 
						||
/// </summary>
 | 
						||
/// <remarks>This code is performance sensitive</remarks>
 | 
						||
public static class BookSortTitlePrefixHelper
 | 
						||
{
 | 
						||
    private static readonly Dictionary<string, byte> PrefixLookup;
 | 
						||
    private static readonly Dictionary<char, List<string>> PrefixesByFirstChar;
 | 
						||
 | 
						||
    static BookSortTitlePrefixHelper()
 | 
						||
    {
 | 
						||
        var prefixes = new[]
 | 
						||
        {
 | 
						||
            // English
 | 
						||
            "the", "a", "an",
 | 
						||
            // Spanish
 | 
						||
            "el", "la", "los", "las", "un", "una", "unos", "unas",
 | 
						||
            // French
 | 
						||
            "le", "la", "les", "un", "une", "des",
 | 
						||
            // German
 | 
						||
            "der", "die", "das", "den", "dem", "ein", "eine", "einen", "einer",
 | 
						||
            // Italian
 | 
						||
            "il", "lo", "la", "gli", "le", "un", "uno", "una",
 | 
						||
            // Portuguese
 | 
						||
            "o", "a", "os", "as", "um", "uma", "uns", "umas",
 | 
						||
            // Russian (transliterated common ones)
 | 
						||
            "в", "на", "с", "к", "от", "для",
 | 
						||
        };
 | 
						||
 | 
						||
        // Build lookup structures
 | 
						||
        PrefixLookup = new Dictionary<string, byte>(prefixes.Length, StringComparer.OrdinalIgnoreCase);
 | 
						||
        PrefixesByFirstChar = new Dictionary<char, List<string>>();
 | 
						||
 | 
						||
        foreach (var prefix in prefixes)
 | 
						||
        {
 | 
						||
            PrefixLookup[prefix] = 1;
 | 
						||
 | 
						||
            var firstChar = char.ToLowerInvariant(prefix[0]);
 | 
						||
            if (!PrefixesByFirstChar.TryGetValue(firstChar, out var list))
 | 
						||
            {
 | 
						||
                list = [];
 | 
						||
                PrefixesByFirstChar[firstChar] = list;
 | 
						||
            }
 | 
						||
            list.Add(prefix);
 | 
						||
        }
 | 
						||
    }
 | 
						||
 | 
						||
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
 | 
						||
    public static ReadOnlySpan<char> GetSortTitle(ReadOnlySpan<char> title)
 | 
						||
    {
 | 
						||
        if (title.IsEmpty) return title;
 | 
						||
 | 
						||
        // Fast detection of script type by first character
 | 
						||
        var firstChar = title[0];
 | 
						||
 | 
						||
        // CJK Unicode ranges - no processing needed for most cases
 | 
						||
        if ((firstChar >= 0x4E00 && firstChar <= 0x9FFF) ||   // CJK Unified
 | 
						||
            (firstChar >= 0x3040 && firstChar <= 0x309F) ||   // Hiragana
 | 
						||
            (firstChar >= 0x30A0 && firstChar <= 0x30FF))     // Katakana
 | 
						||
        {
 | 
						||
            return title;
 | 
						||
        }
 | 
						||
 | 
						||
        var firstSpaceIndex = title.IndexOf(' ');
 | 
						||
        if (firstSpaceIndex <= 0) return title;
 | 
						||
 | 
						||
        var potentialPrefix = title.Slice(0, firstSpaceIndex);
 | 
						||
 | 
						||
        // Fast path: check if first character could match any prefix
 | 
						||
        firstChar = char.ToLowerInvariant(potentialPrefix[0]);
 | 
						||
        if (!PrefixesByFirstChar.ContainsKey(firstChar))
 | 
						||
            return title;
 | 
						||
 | 
						||
        // Only do the expensive lookup if first character matches
 | 
						||
        if (PrefixLookup.ContainsKey(potentialPrefix.ToString()))
 | 
						||
        {
 | 
						||
            var remainder = title.Slice(firstSpaceIndex + 1);
 | 
						||
            return remainder.IsEmpty ? title : remainder;
 | 
						||
        }
 | 
						||
 | 
						||
        return title;
 | 
						||
    }
 | 
						||
 | 
						||
    /// <summary>
 | 
						||
    /// Removes the sort prefix
 | 
						||
    /// </summary>
 | 
						||
    /// <param name="title"></param>
 | 
						||
    /// <returns></returns>
 | 
						||
    public static string GetSortTitle(string title)
 | 
						||
    {
 | 
						||
        var result = GetSortTitle(title.AsSpan());
 | 
						||
 | 
						||
        return result.ToString();
 | 
						||
    }
 | 
						||
}
 |