mirror of
				https://github.com/immich-app/immich.git
				synced 2025-11-03 19:29:32 -05:00 
			
		
		
		
	* basic refactor and styling * removed batching * module entrypoint * removed unused imports * model superclass, model cache now in app state * fixed cache dir and enforced abstract method --------- Co-authored-by: Alex Tran <alex.tran1502@gmail.com>
		
			
				
	
	
		
			38 lines
		
	
	
		
			995 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			38 lines
		
	
	
		
			995 B
		
	
	
	
		
			Python
		
	
	
	
	
	
from pathlib import Path
 | 
						|
 | 
						|
from PIL.Image import Image
 | 
						|
from sentence_transformers import SentenceTransformer
 | 
						|
 | 
						|
from ..schemas import ModelType
 | 
						|
from .base import InferenceModel
 | 
						|
 | 
						|
 | 
						|
class CLIPSTEncoder(InferenceModel):
 | 
						|
    _model_type = ModelType.CLIP
 | 
						|
 | 
						|
    def __init__(
 | 
						|
        self,
 | 
						|
        model_name: str,
 | 
						|
        cache_dir: Path | None = None,
 | 
						|
        **model_kwargs,
 | 
						|
    ):
 | 
						|
        super().__init__(model_name, cache_dir)
 | 
						|
        self.model = SentenceTransformer(
 | 
						|
            self.model_name,
 | 
						|
            cache_folder=self.cache_dir.as_posix(),
 | 
						|
            **model_kwargs,
 | 
						|
        )
 | 
						|
 | 
						|
    def predict(self, image_or_text: Image | str) -> list[float]:
 | 
						|
        return self.model.encode(image_or_text).tolist()
 | 
						|
 | 
						|
 | 
						|
# stubs to allow different behavior between the two in the future
 | 
						|
# and handle loading different image and text clip models
 | 
						|
class CLIPSTVisionEncoder(CLIPSTEncoder):
 | 
						|
    _model_type = ModelType.CLIP_VISION
 | 
						|
 | 
						|
 | 
						|
class CLIPSTTextEncoder(CLIPSTEncoder):
 | 
						|
    _model_type = ModelType.CLIP_TEXT
 |