mirror of
				https://github.com/jellyfin/jellyfin.git
				synced 2025-11-04 03:27:21 -05:00 
			
		
		
		
	
		
			
				
	
	
		
			375 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
			
		
		
	
	
			375 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
using System;
 | 
						|
 | 
						|
namespace NLangDetect.Core.Extensions
 | 
						|
{
 | 
						|
  public static class CharExtensions
 | 
						|
  {
 | 
						|
    private const int MIN_CODE_POINT = 0x000000;
 | 
						|
    private const int MAX_CODE_POINT = 0x10ffff;
 | 
						|
 | 
						|
    private static readonly int[] _unicodeBlockStarts =
 | 
						|
      {
 | 
						|
        #region Unicode block starts
 | 
						|
 | 
						|
        0x0000, // Basic Latin
 | 
						|
        0x0080, // Latin-1 Supplement
 | 
						|
        0x0100, // Latin Extended-A
 | 
						|
        0x0180, // Latin Extended-B
 | 
						|
        0x0250, // IPA Extensions
 | 
						|
        0x02B0, // Spacing Modifier Letters
 | 
						|
        0x0300, // Combining Diacritical Marks
 | 
						|
        0x0370, // Greek and Coptic
 | 
						|
        0x0400, // Cyrillic
 | 
						|
        0x0500, // Cyrillic Supplementary
 | 
						|
        0x0530, // Armenian
 | 
						|
        0x0590, // Hebrew
 | 
						|
        0x0600, // Arabic
 | 
						|
        0x0700, // Syriac
 | 
						|
        0x0750, // unassigned
 | 
						|
        0x0780, // Thaana
 | 
						|
        0x07C0, // unassigned
 | 
						|
        0x0900, // Devanagari
 | 
						|
        0x0980, // Bengali
 | 
						|
        0x0A00, // Gurmukhi
 | 
						|
        0x0A80, // Gujarati
 | 
						|
        0x0B00, // Oriya
 | 
						|
        0x0B80, // Tamil
 | 
						|
        0x0C00, // Telugu
 | 
						|
        0x0C80, // Kannada
 | 
						|
        0x0D00, // Malayalam
 | 
						|
        0x0D80, // Sinhala
 | 
						|
        0x0E00, // Thai
 | 
						|
        0x0E80, // Lao
 | 
						|
        0x0F00, // Tibetan
 | 
						|
        0x1000, // Myanmar
 | 
						|
        0x10A0, // Georgian
 | 
						|
        0x1100, // Hangul Jamo
 | 
						|
        0x1200, // Ethiopic
 | 
						|
        0x1380, // unassigned
 | 
						|
        0x13A0, // Cherokee
 | 
						|
        0x1400, // Unified Canadian Aboriginal Syllabics
 | 
						|
        0x1680, // Ogham
 | 
						|
        0x16A0, // Runic
 | 
						|
        0x1700, // Tagalog
 | 
						|
        0x1720, // Hanunoo
 | 
						|
        0x1740, // Buhid
 | 
						|
        0x1760, // Tagbanwa
 | 
						|
        0x1780, // Khmer
 | 
						|
        0x1800, // Mongolian
 | 
						|
        0x18B0, // unassigned
 | 
						|
        0x1900, // Limbu
 | 
						|
        0x1950, // Tai Le
 | 
						|
        0x1980, // unassigned
 | 
						|
        0x19E0, // Khmer Symbols
 | 
						|
        0x1A00, // unassigned
 | 
						|
        0x1D00, // Phonetic Extensions
 | 
						|
        0x1D80, // unassigned
 | 
						|
        0x1E00, // Latin Extended Additional
 | 
						|
        0x1F00, // Greek Extended
 | 
						|
        0x2000, // General Punctuation
 | 
						|
        0x2070, // Superscripts and Subscripts
 | 
						|
        0x20A0, // Currency Symbols
 | 
						|
        0x20D0, // Combining Diacritical Marks for Symbols
 | 
						|
        0x2100, // Letterlike Symbols
 | 
						|
        0x2150, // Number Forms
 | 
						|
        0x2190, // Arrows
 | 
						|
        0x2200, // Mathematical Operators
 | 
						|
        0x2300, // Miscellaneous Technical
 | 
						|
        0x2400, // Control Pictures
 | 
						|
        0x2440, // Optical Character Recognition
 | 
						|
        0x2460, // Enclosed Alphanumerics
 | 
						|
        0x2500, // Box Drawing
 | 
						|
        0x2580, // Block Elements
 | 
						|
        0x25A0, // Geometric Shapes
 | 
						|
        0x2600, // Miscellaneous Symbols
 | 
						|
        0x2700, // Dingbats
 | 
						|
        0x27C0, // Miscellaneous Mathematical Symbols-A
 | 
						|
        0x27F0, // Supplemental Arrows-A
 | 
						|
        0x2800, // Braille Patterns
 | 
						|
        0x2900, // Supplemental Arrows-B
 | 
						|
        0x2980, // Miscellaneous Mathematical Symbols-B
 | 
						|
        0x2A00, // Supplemental Mathematical Operators
 | 
						|
        0x2B00, // Miscellaneous Symbols and Arrows
 | 
						|
        0x2C00, // unassigned
 | 
						|
        0x2E80, // CJK Radicals Supplement
 | 
						|
        0x2F00, // Kangxi Radicals
 | 
						|
        0x2FE0, // unassigned
 | 
						|
        0x2FF0, // Ideographic Description Characters
 | 
						|
        0x3000, // CJK Symbols and Punctuation
 | 
						|
        0x3040, // Hiragana
 | 
						|
        0x30A0, // Katakana
 | 
						|
        0x3100, // Bopomofo
 | 
						|
        0x3130, // Hangul Compatibility Jamo
 | 
						|
        0x3190, // Kanbun
 | 
						|
        0x31A0, // Bopomofo Extended
 | 
						|
        0x31C0, // unassigned
 | 
						|
        0x31F0, // Katakana Phonetic Extensions
 | 
						|
        0x3200, // Enclosed CJK Letters and Months
 | 
						|
        0x3300, // CJK Compatibility
 | 
						|
        0x3400, // CJK Unified Ideographs Extension A
 | 
						|
        0x4DC0, // Yijing Hexagram Symbols
 | 
						|
        0x4E00, // CJK Unified Ideographs
 | 
						|
        0xA000, // Yi Syllables
 | 
						|
        0xA490, // Yi Radicals
 | 
						|
        0xA4D0, // unassigned
 | 
						|
        0xAC00, // Hangul Syllables
 | 
						|
        0xD7B0, // unassigned
 | 
						|
        0xD800, // High Surrogates
 | 
						|
        0xDB80, // High Private Use Surrogates
 | 
						|
        0xDC00, // Low Surrogates
 | 
						|
        0xE000, // Private Use
 | 
						|
        0xF900, // CJK Compatibility Ideographs
 | 
						|
        0xFB00, // Alphabetic Presentation Forms
 | 
						|
        0xFB50, // Arabic Presentation Forms-A
 | 
						|
        0xFE00, // Variation Selectors
 | 
						|
        0xFE10, // unassigned
 | 
						|
        0xFE20, // Combining Half Marks
 | 
						|
        0xFE30, // CJK Compatibility Forms
 | 
						|
        0xFE50, // Small Form Variants
 | 
						|
        0xFE70, // Arabic Presentation Forms-B
 | 
						|
        0xFF00, // Halfwidth and Fullwidth Forms
 | 
						|
        0xFFF0, // Specials
 | 
						|
        0x10000, // Linear B Syllabary
 | 
						|
        0x10080, // Linear B Ideograms
 | 
						|
        0x10100, // Aegean Numbers
 | 
						|
        0x10140, // unassigned
 | 
						|
        0x10300, // Old Italic
 | 
						|
        0x10330, // Gothic
 | 
						|
        0x10350, // unassigned
 | 
						|
        0x10380, // Ugaritic
 | 
						|
        0x103A0, // unassigned
 | 
						|
        0x10400, // Deseret
 | 
						|
        0x10450, // Shavian
 | 
						|
        0x10480, // Osmanya
 | 
						|
        0x104B0, // unassigned
 | 
						|
        0x10800, // Cypriot Syllabary
 | 
						|
        0x10840, // unassigned
 | 
						|
        0x1D000, // Byzantine Musical Symbols
 | 
						|
        0x1D100, // Musical Symbols
 | 
						|
        0x1D200, // unassigned
 | 
						|
        0x1D300, // Tai Xuan Jing Symbols
 | 
						|
        0x1D360, // unassigned
 | 
						|
        0x1D400, // Mathematical Alphanumeric Symbols
 | 
						|
        0x1D800, // unassigned
 | 
						|
        0x20000, // CJK Unified Ideographs Extension B
 | 
						|
        0x2A6E0, // unassigned
 | 
						|
        0x2F800, // CJK Compatibility Ideographs Supplement
 | 
						|
        0x2FA20, // unassigned
 | 
						|
        0xE0000, // Tags
 | 
						|
        0xE0080, // unassigned
 | 
						|
        0xE0100, // Variation Selectors Supplement
 | 
						|
        0xE01F0, // unassigned
 | 
						|
        0xF0000, // Supplementary Private Use Area-A
 | 
						|
        0x100000, // Supplementary Private Use Area-B
 | 
						|
 | 
						|
        #endregion
 | 
						|
      };
 | 
						|
 | 
						|
    private static readonly UnicodeBlock?[] _unicodeBlocks =
 | 
						|
      {
 | 
						|
        #region Unicode blocks
 | 
						|
        UnicodeBlock.BasicLatin,
 | 
						|
        UnicodeBlock.Latin1Supplement,
 | 
						|
        UnicodeBlock.LatinExtendedA,
 | 
						|
        UnicodeBlock.LatinExtendedB,
 | 
						|
        UnicodeBlock.IpaExtensions,
 | 
						|
        UnicodeBlock.SpacingModifierLetters,
 | 
						|
        UnicodeBlock.CombiningDiacriticalMarks,
 | 
						|
        UnicodeBlock.Greek,
 | 
						|
        UnicodeBlock.Cyrillic,
 | 
						|
        UnicodeBlock.CyrillicSupplementary,
 | 
						|
        UnicodeBlock.Armenian,
 | 
						|
        UnicodeBlock.Hebrew,
 | 
						|
        UnicodeBlock.Arabic,
 | 
						|
        UnicodeBlock.Syriac,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.Thaana,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.Devanagari,
 | 
						|
        UnicodeBlock.Bengali,
 | 
						|
        UnicodeBlock.Gurmukhi,
 | 
						|
        UnicodeBlock.Gujarati,
 | 
						|
        UnicodeBlock.Oriya,
 | 
						|
        UnicodeBlock.Tamil,
 | 
						|
        UnicodeBlock.Telugu,
 | 
						|
        UnicodeBlock.Kannada,
 | 
						|
        UnicodeBlock.Malayalam,
 | 
						|
        UnicodeBlock.Sinhala,
 | 
						|
        UnicodeBlock.Thai,
 | 
						|
        UnicodeBlock.Lao,
 | 
						|
        UnicodeBlock.Tibetan,
 | 
						|
        UnicodeBlock.Myanmar,
 | 
						|
        UnicodeBlock.Georgian,
 | 
						|
        UnicodeBlock.HangulJamo,
 | 
						|
        UnicodeBlock.Ethiopic,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.Cherokee,
 | 
						|
        UnicodeBlock.UnifiedCanadianAboriginalSyllabics,
 | 
						|
        UnicodeBlock.Ogham,
 | 
						|
        UnicodeBlock.Runic,
 | 
						|
        UnicodeBlock.Tagalog,
 | 
						|
        UnicodeBlock.Hanunoo,
 | 
						|
        UnicodeBlock.Buhid,
 | 
						|
        UnicodeBlock.Tagbanwa,
 | 
						|
        UnicodeBlock.Khmer,
 | 
						|
        UnicodeBlock.Mongolian,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.Limbu,
 | 
						|
        UnicodeBlock.TaiLe,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.KhmerSymbols,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.PhoneticExtensions,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.LatinExtendedAdditional,
 | 
						|
        UnicodeBlock.GreekExtended,
 | 
						|
        UnicodeBlock.GeneralPunctuation,
 | 
						|
        UnicodeBlock.SuperscriptsAndSubscripts,
 | 
						|
        UnicodeBlock.CurrencySymbols,
 | 
						|
        UnicodeBlock.CombiningMarksForSymbols,
 | 
						|
        UnicodeBlock.LetterlikeSymbols,
 | 
						|
        UnicodeBlock.NumberForms,
 | 
						|
        UnicodeBlock.Arrows,
 | 
						|
        UnicodeBlock.MathematicalOperators,
 | 
						|
        UnicodeBlock.MiscellaneousTechnical,
 | 
						|
        UnicodeBlock.ControlPictures,
 | 
						|
        UnicodeBlock.OpticalCharacterRecognition,
 | 
						|
        UnicodeBlock.EnclosedAlphanumerics,
 | 
						|
        UnicodeBlock.BoxDrawing,
 | 
						|
        UnicodeBlock.BlockElements,
 | 
						|
        UnicodeBlock.GeometricShapes,
 | 
						|
        UnicodeBlock.MiscellaneousSymbols,
 | 
						|
        UnicodeBlock.Dingbats,
 | 
						|
        UnicodeBlock.MiscellaneousMathematicalSymbolsA,
 | 
						|
        UnicodeBlock.SupplementalArrowsA,
 | 
						|
        UnicodeBlock.BraillePatterns,
 | 
						|
        UnicodeBlock.SupplementalArrowsB,
 | 
						|
        UnicodeBlock.MiscellaneousMathematicalSymbolsB,
 | 
						|
        UnicodeBlock.SupplementalMathematicalOperators,
 | 
						|
        UnicodeBlock.MiscellaneousSymbolsAndArrows,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.CjkRadicalsSupplement,
 | 
						|
        UnicodeBlock.KangxiRadicals,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.IdeographicDescriptionCharacters,
 | 
						|
        UnicodeBlock.CjkSymbolsAndPunctuation,
 | 
						|
        UnicodeBlock.Hiragana,
 | 
						|
        UnicodeBlock.Katakana,
 | 
						|
        UnicodeBlock.Bopomofo,
 | 
						|
        UnicodeBlock.HangulCompatibilityJamo,
 | 
						|
        UnicodeBlock.Kanbun,
 | 
						|
        UnicodeBlock.BopomofoExtended,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.KatakanaPhoneticExtensions,
 | 
						|
        UnicodeBlock.EnclosedCjkLettersAndMonths,
 | 
						|
        UnicodeBlock.CjkCompatibility,
 | 
						|
        UnicodeBlock.CjkUnifiedIdeographsExtensionA,
 | 
						|
        UnicodeBlock.YijingHexagramSymbols,
 | 
						|
        UnicodeBlock.CjkUnifiedIdeographs,
 | 
						|
        UnicodeBlock.YiSyllables,
 | 
						|
        UnicodeBlock.YiRadicals,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.HangulSyllables,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.HighSurrogates,
 | 
						|
        UnicodeBlock.HighPrivateUseSurrogates,
 | 
						|
        UnicodeBlock.LowSurrogates,
 | 
						|
        UnicodeBlock.PrivateUseArea,
 | 
						|
        UnicodeBlock.CjkCompatibilityIdeographs,
 | 
						|
        UnicodeBlock.AlphabeticPresentationForms,
 | 
						|
        UnicodeBlock.ArabicPresentationFormsA,
 | 
						|
        UnicodeBlock.VariationSelectors,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.CombiningHalfMarks,
 | 
						|
        UnicodeBlock.CjkCompatibilityForms,
 | 
						|
        UnicodeBlock.SmallFormVariants,
 | 
						|
        UnicodeBlock.ArabicPresentationFormsB,
 | 
						|
        UnicodeBlock.HalfwidthAndFullwidthForms,
 | 
						|
        UnicodeBlock.Specials,
 | 
						|
        UnicodeBlock.LinearBSyllabary,
 | 
						|
        UnicodeBlock.LinearBIdeograms,
 | 
						|
        UnicodeBlock.AegeanNumbers,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.OldItalic,
 | 
						|
        UnicodeBlock.Gothic,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.Ugaritic,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.Deseret,
 | 
						|
        UnicodeBlock.Shavian,
 | 
						|
        UnicodeBlock.Osmanya,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.CypriotSyllabary,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.ByzantineMusicalSymbols,
 | 
						|
        UnicodeBlock.MusicalSymbols,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.TaiXuanJingSymbols,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.MathematicalAlphanumericSymbols,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.CjkUnifiedIdeographsExtensionB,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.CjkCompatibilityIdeographsSupplement,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.Tags,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.VariationSelectorsSupplement,
 | 
						|
        null,
 | 
						|
        UnicodeBlock.SupplementaryPrivateUseAreaA,
 | 
						|
        UnicodeBlock.SupplementaryPrivateUseAreaB,
 | 
						|
 | 
						|
        #endregion
 | 
						|
      };
 | 
						|
 | 
						|
    #region Public methods
 | 
						|
 | 
						|
    /// <remarks>
 | 
						|
    /// Taken from JDK source: http://grepcode.com/file/repository.grepcode.com/java/root/jdk/openjdk/6-b14/java/lang/Character.java#Character.UnicodeBlock.0LATIN_EXTENDED_ADDITIONAL
 | 
						|
    /// </remarks>
 | 
						|
    public static UnicodeBlock? GetUnicodeBlock(this char ch)
 | 
						|
    {
 | 
						|
      int codePoint = ch;
 | 
						|
 | 
						|
      if (!IsValidCodePoint(codePoint))
 | 
						|
      {
 | 
						|
        throw new ArgumentException("Argument is not a valid code point.", "ch");
 | 
						|
      }
 | 
						|
 | 
						|
      int top, bottom, current;
 | 
						|
 | 
						|
      bottom = 0;
 | 
						|
      top = _unicodeBlockStarts.Length;
 | 
						|
      current = top / 2;
 | 
						|
 | 
						|
      // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
 | 
						|
      while (top - bottom > 1)
 | 
						|
      {
 | 
						|
        if (codePoint >= _unicodeBlockStarts[current])
 | 
						|
        {
 | 
						|
          bottom = current;
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
          top = current;
 | 
						|
        }
 | 
						|
 | 
						|
        current = (top + bottom) / 2;
 | 
						|
      }
 | 
						|
 | 
						|
      return _unicodeBlocks[current];
 | 
						|
    }
 | 
						|
 | 
						|
    #endregion
 | 
						|
 | 
						|
    #region Private helper methods
 | 
						|
 | 
						|
    private static bool IsValidCodePoint(int codePoint)
 | 
						|
    {
 | 
						|
      return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
 | 
						|
    }
 | 
						|
 | 
						|
    #endregion
 | 
						|
  }
 | 
						|
}
 |