mirror of
				https://github.com/jellyfin/jellyfin.git
				synced 2025-11-04 03:27:21 -05:00 
			
		
		
		
	
		
			
				
	
	
		
			119 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
			
		
		
	
	
			119 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
using System.Collections.Generic;
 | 
						|
using System.Text.RegularExpressions;
 | 
						|
 | 
						|
namespace NLangDetect.Core.Utils
 | 
						|
{
 | 
						|
    public class LangProfile
 | 
						|
    {
 | 
						|
        private const int MinimumFreq = 2;
 | 
						|
        private const int LessFreqRatio = 100000;
 | 
						|
 | 
						|
        public string name { get; set; }
 | 
						|
 | 
						|
        public Dictionary<string, int> freq { get; set; }
 | 
						|
        public int[] n_words { get; set; }
 | 
						|
 | 
						|
        #region Constructor(s)
 | 
						|
 | 
						|
        public LangProfile()
 | 
						|
        {
 | 
						|
            freq = new Dictionary<string, int>();
 | 
						|
            n_words = new int[NGram.GramsCount];
 | 
						|
        }
 | 
						|
 | 
						|
        public LangProfile(string name)
 | 
						|
        {
 | 
						|
            this.name = name;
 | 
						|
            freq = new Dictionary<string, int>();
 | 
						|
            n_words = new int[NGram.GramsCount];
 | 
						|
        }
 | 
						|
 | 
						|
        #endregion
 | 
						|
 | 
						|
        #region Public methods
 | 
						|
 | 
						|
        public void Add(string gram)
 | 
						|
        {
 | 
						|
            if (name == null || gram == null) return; // Illegal
 | 
						|
            int len = gram.Length;
 | 
						|
            if (len < 1 || len > NGram.GramsCount) return; // Illegal
 | 
						|
 | 
						|
            n_words[len - 1]++;
 | 
						|
 | 
						|
            if (freq.ContainsKey(gram))
 | 
						|
            {
 | 
						|
                freq[gram] = freq[gram] + 1;
 | 
						|
            }
 | 
						|
            else
 | 
						|
            {
 | 
						|
                freq.Add(gram, 1);
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        public void OmitLessFreq()
 | 
						|
        {
 | 
						|
            if (name == null) return; // Illegal
 | 
						|
            int threshold = n_words[0] / LessFreqRatio;
 | 
						|
            if (threshold < MinimumFreq) threshold = MinimumFreq;
 | 
						|
 | 
						|
            ICollection<string> keys = freq.Keys;
 | 
						|
            int roman = 0;
 | 
						|
            // TODO IMM HI: move up?
 | 
						|
            Regex regex1 = new Regex("^[A-Za-z]$", RegexOptions.Compiled);
 | 
						|
            List<string> keysToRemove = new List<string>();
 | 
						|
 | 
						|
            foreach (string key in keys)
 | 
						|
            {
 | 
						|
                int count = freq[key];
 | 
						|
 | 
						|
                if (count <= threshold)
 | 
						|
                {
 | 
						|
                    n_words[key.Length - 1] -= count;
 | 
						|
                    keysToRemove.Add(key);
 | 
						|
                }
 | 
						|
                else
 | 
						|
                {
 | 
						|
                    if (regex1.IsMatch(key))
 | 
						|
                    {
 | 
						|
                        roman += count;
 | 
						|
                    }
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            foreach (string keyToRemove in keysToRemove)
 | 
						|
            {
 | 
						|
                freq.Remove(keyToRemove);
 | 
						|
            }
 | 
						|
 | 
						|
            // roman check
 | 
						|
            keysToRemove = new List<string>();
 | 
						|
 | 
						|
            if (roman < n_words[0] / 3)
 | 
						|
            {
 | 
						|
                ICollection<string> keys2 = freq.Keys;
 | 
						|
 | 
						|
                // TODO IMM HI: move up?
 | 
						|
                Regex regex2 = new Regex(".*[A-Za-z].*", RegexOptions.Compiled);
 | 
						|
 | 
						|
                foreach (string key in keys2)
 | 
						|
                {
 | 
						|
                    int count = freq[key];
 | 
						|
 | 
						|
                    if (regex2.IsMatch(key))
 | 
						|
                    {
 | 
						|
                        n_words[key.Length - 1] -= count;
 | 
						|
                        keysToRemove.Add(key);
 | 
						|
                    }
 | 
						|
                }
 | 
						|
 | 
						|
                foreach (string keyToRemove in keysToRemove)
 | 
						|
                {
 | 
						|
                    freq.Remove(keyToRemove);
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        #endregion
 | 
						|
    }
 | 
						|
}
 |