mirror of
				https://github.com/jellyfin/jellyfin.git
				synced 2025-10-30 18:22:48 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			119 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
			
		
		
	
	
			119 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
| using System.Collections.Generic;
 | |
| using System.Text.RegularExpressions;
 | |
| 
 | |
| namespace NLangDetect.Core.Utils
 | |
| {
 | |
|     public class LangProfile
 | |
|     {
 | |
|         private const int MinimumFreq = 2;
 | |
|         private const int LessFreqRatio = 100000;
 | |
| 
 | |
|         public string name { get; set; }
 | |
| 
 | |
|         public Dictionary<string, int> freq { get; set; }
 | |
|         public int[] n_words { get; set; }
 | |
| 
 | |
|         #region Constructor(s)
 | |
| 
 | |
|         public LangProfile()
 | |
|         {
 | |
|             freq = new Dictionary<string, int>();
 | |
|             n_words = new int[NGram.GramsCount];
 | |
|         }
 | |
| 
 | |
|         public LangProfile(string name)
 | |
|         {
 | |
|             this.name = name;
 | |
|             freq = new Dictionary<string, int>();
 | |
|             n_words = new int[NGram.GramsCount];
 | |
|         }
 | |
| 
 | |
|         #endregion
 | |
| 
 | |
|         #region Public methods
 | |
| 
 | |
|         public void Add(string gram)
 | |
|         {
 | |
|             if (name == null || gram == null) return; // Illegal
 | |
|             int len = gram.Length;
 | |
|             if (len < 1 || len > NGram.GramsCount) return; // Illegal
 | |
| 
 | |
|             n_words[len - 1]++;
 | |
| 
 | |
|             if (freq.ContainsKey(gram))
 | |
|             {
 | |
|                 freq[gram] = freq[gram] + 1;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 freq.Add(gram, 1);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         public void OmitLessFreq()
 | |
|         {
 | |
|             if (name == null) return; // Illegal
 | |
|             int threshold = n_words[0] / LessFreqRatio;
 | |
|             if (threshold < MinimumFreq) threshold = MinimumFreq;
 | |
| 
 | |
|             ICollection<string> keys = freq.Keys;
 | |
|             int roman = 0;
 | |
|             // TODO IMM HI: move up?
 | |
|             Regex regex1 = new Regex("^[A-Za-z]$", RegexOptions.Compiled);
 | |
|             List<string> keysToRemove = new List<string>();
 | |
| 
 | |
|             foreach (string key in keys)
 | |
|             {
 | |
|                 int count = freq[key];
 | |
| 
 | |
|                 if (count <= threshold)
 | |
|                 {
 | |
|                     n_words[key.Length - 1] -= count;
 | |
|                     keysToRemove.Add(key);
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     if (regex1.IsMatch(key))
 | |
|                     {
 | |
|                         roman += count;
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             foreach (string keyToRemove in keysToRemove)
 | |
|             {
 | |
|                 freq.Remove(keyToRemove);
 | |
|             }
 | |
| 
 | |
|             // roman check
 | |
|             keysToRemove = new List<string>();
 | |
| 
 | |
|             if (roman < n_words[0] / 3)
 | |
|             {
 | |
|                 ICollection<string> keys2 = freq.Keys;
 | |
| 
 | |
|                 // TODO IMM HI: move up?
 | |
|                 Regex regex2 = new Regex(".*[A-Za-z].*", RegexOptions.Compiled);
 | |
| 
 | |
|                 foreach (string key in keys2)
 | |
|                 {
 | |
|                     int count = freq[key];
 | |
| 
 | |
|                     if (regex2.IsMatch(key))
 | |
|                     {
 | |
|                         n_words[key.Length - 1] -= count;
 | |
|                         keysToRemove.Add(key);
 | |
|                     }
 | |
|                 }
 | |
| 
 | |
|                 foreach (string keyToRemove in keysToRemove)
 | |
|                 {
 | |
|                     freq.Remove(keyToRemove);
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         #endregion
 | |
|     }
 | |
| }
 |