mirror of
				https://github.com/LibreTranslate/LibreTranslate.git
				synced 2025-11-03 19:17:17 -05:00 
			
		
		
		
	Add lexilang for language detection on short texts
This commit is contained in:
		
							parent
							
								
									c9592a236a
								
							
						
					
					
						commit
						6ff5bba000
					
				@ -1,6 +1,7 @@
 | 
				
			|||||||
from functools import lru_cache
 | 
					from functools import lru_cache
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import linguars
 | 
					import linguars
 | 
				
			||||||
 | 
					from lexilang.detector import detect as lldetect
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Language:
 | 
					class Language:
 | 
				
			||||||
@ -26,9 +27,15 @@ def load_detector(langcodes = ()):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class Detector:
 | 
					class Detector:
 | 
				
			||||||
  def __init__(self, langcodes = ()):
 | 
					  def __init__(self, langcodes = ()):
 | 
				
			||||||
 | 
					    self.langcodes = langcodes
 | 
				
			||||||
    self.detector = load_detector(langcodes)
 | 
					    self.detector = load_detector(langcodes)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def detect(self, text):
 | 
					  def detect(self, text):
 | 
				
			||||||
 | 
					    if len(text) < 18:
 | 
				
			||||||
 | 
					      code, conf = lldetect(text, self.langcodes)
 | 
				
			||||||
 | 
					      if conf > 0:
 | 
				
			||||||
 | 
					        return [Language(code, round(conf * 100))]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    top_3_choices = self.detector.confidence(text)[:3]
 | 
					    top_3_choices = self.detector.confidence(text)[:3]
 | 
				
			||||||
    if top_3_choices[0][1] == 0:
 | 
					    if top_3_choices[0][1] == 0:
 | 
				
			||||||
      return [Language("en", 0)]
 | 
					      return [Language("en", 0)]
 | 
				
			||||||
 | 
				
			|||||||
@ -18,7 +18,7 @@ def load_languages():
 | 
				
			|||||||
@lru_cache(maxsize=None)
 | 
					@lru_cache(maxsize=None)
 | 
				
			||||||
def load_lang_codes():
 | 
					def load_lang_codes():
 | 
				
			||||||
    languages = load_languages()
 | 
					    languages = load_languages()
 | 
				
			||||||
    return (l.code for l in languages)
 | 
					    return tuple(l.code for l in languages)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def detect_languages(text):
 | 
					def detect_languages(text):
 | 
				
			||||||
    # detect batch processing
 | 
					    # detect batch processing
 | 
				
			||||||
 | 
				
			|||||||
@ -43,6 +43,7 @@ dependencies = [
 | 
				
			|||||||
    "waitress ==2.1.2",
 | 
					    "waitress ==2.1.2",
 | 
				
			||||||
    "expiringdict ==1.2.2",
 | 
					    "expiringdict ==1.2.2",
 | 
				
			||||||
    "linguars==0.4.0",
 | 
					    "linguars==0.4.0",
 | 
				
			||||||
 | 
					    "lexilang==1.0.1",
 | 
				
			||||||
    "morfessor ==2.0.6",
 | 
					    "morfessor ==2.0.6",
 | 
				
			||||||
    "appdirs ==1.4.4",
 | 
					    "appdirs ==1.4.4",
 | 
				
			||||||
    "APScheduler ==3.9.1",
 | 
					    "APScheduler ==3.9.1",
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user