using System; using System.Collections.Generic; using System.Runtime.CompilerServices; namespace API.Helpers; /// /// Responsible for parsing book titles "The man on the street" and removing the prefix -> "man on the street". /// /// This code is performance sensitive public static class BookSortTitlePrefixHelper { private static readonly Dictionary PrefixLookup; private static readonly Dictionary> PrefixesByFirstChar; static BookSortTitlePrefixHelper() { var prefixes = new[] { // English "the", "a", "an", // Spanish "el", "la", "los", "las", "un", "una", "unos", "unas", // French "le", "la", "les", "un", "une", "des", // German "der", "die", "das", "den", "dem", "ein", "eine", "einen", "einer", // Italian "il", "lo", "la", "gli", "le", "un", "uno", "una", // Portuguese "o", "a", "os", "as", "um", "uma", "uns", "umas", // Russian (transliterated common ones) "в", "на", "с", "к", "от", "для", }; // Build lookup structures PrefixLookup = new Dictionary(prefixes.Length, StringComparer.OrdinalIgnoreCase); PrefixesByFirstChar = new Dictionary>(); foreach (var prefix in prefixes) { PrefixLookup[prefix] = 1; var firstChar = char.ToLowerInvariant(prefix[0]); if (!PrefixesByFirstChar.TryGetValue(firstChar, out var list)) { list = []; PrefixesByFirstChar[firstChar] = list; } list.Add(prefix); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ReadOnlySpan GetSortTitle(ReadOnlySpan title) { if (title.IsEmpty) return title; // Fast detection of script type by first character var firstChar = title[0]; // CJK Unicode ranges - no processing needed for most cases if ((firstChar >= 0x4E00 && firstChar <= 0x9FFF) || // CJK Unified (firstChar >= 0x3040 && firstChar <= 0x309F) || // Hiragana (firstChar >= 0x30A0 && firstChar <= 0x30FF)) // Katakana { return title; } var firstSpaceIndex = title.IndexOf(' '); if (firstSpaceIndex <= 0) return title; var potentialPrefix = title.Slice(0, firstSpaceIndex); // Fast path: check if first character could match any prefix firstChar = char.ToLowerInvariant(potentialPrefix[0]); if (!PrefixesByFirstChar.ContainsKey(firstChar)) return title; // Only do the expensive lookup if first character matches if (PrefixLookup.ContainsKey(potentialPrefix.ToString())) { var remainder = title.Slice(firstSpaceIndex + 1); return remainder.IsEmpty ? title : remainder; } return title; } /// /// Removes the sort prefix /// /// /// public static string GetSortTitle(string title) { var result = GetSortTitle(title.AsSpan()); return result.ToString(); } }