mirror of
				https://github.com/immich-app/immich.git
				synced 2025-10-25 15:52:33 -04:00 
			
		
		
		
	feat(ml): add more search models (#11468)
* update export code * add uuid glob, sort model names * add new models to ml, sort names * add new models to server, sort by dims and name * typo in name * update export dependencies * onnx save function * format
This commit is contained in:
		
							parent
							
								
									2423bb36c4
								
							
						
					
					
						commit
						41580696c7
					
				| @ -2,53 +2,64 @@ from app.config import clean_name | ||||
| from app.schemas import ModelSource | ||||
| 
 | ||||
| _OPENCLIP_MODELS = { | ||||
|     "RN50__openai", | ||||
|     "RN50__yfcc15m", | ||||
|     "RN50__cc12m", | ||||
|     "RN101__openai", | ||||
|     "RN101__yfcc15m", | ||||
|     "RN50x4__openai", | ||||
|     "RN50__cc12m", | ||||
|     "RN50__openai", | ||||
|     "RN50__yfcc15m", | ||||
|     "RN50x16__openai", | ||||
|     "RN50x4__openai", | ||||
|     "RN50x64__openai", | ||||
|     "ViT-B-32__openai", | ||||
|     "ViT-B-16-SigLIP-256__webli", | ||||
|     "ViT-B-16-SigLIP-384__webli", | ||||
|     "ViT-B-16-SigLIP-512__webli", | ||||
|     "ViT-B-16-SigLIP-i18n-256__webli", | ||||
|     "ViT-B-16-SigLIP__webli", | ||||
|     "ViT-B-16-plus-240__laion400m_e31", | ||||
|     "ViT-B-16-plus-240__laion400m_e32", | ||||
|     "ViT-B-16__laion400m_e31", | ||||
|     "ViT-B-16__laion400m_e32", | ||||
|     "ViT-B-16__openai", | ||||
|     "ViT-B-32__laion2b-s34b-b79k", | ||||
|     "ViT-B-32__laion2b_e16", | ||||
|     "ViT-B-32__laion400m_e31", | ||||
|     "ViT-B-32__laion400m_e32", | ||||
|     "ViT-B-32__laion2b-s34b-b79k", | ||||
|     "ViT-B-16__openai", | ||||
|     "ViT-B-16__laion400m_e31", | ||||
|     "ViT-B-16__laion400m_e32", | ||||
|     "ViT-B-16-plus-240__laion400m_e31", | ||||
|     "ViT-B-16-plus-240__laion400m_e32", | ||||
|     "ViT-L-14__openai", | ||||
|     "ViT-B-32__openai", | ||||
|     "ViT-H-14-378-quickgelu__dfn5b", | ||||
|     "ViT-H-14-quickgelu__dfn5b", | ||||
|     "ViT-H-14__laion2b-s32b-b79k", | ||||
|     "ViT-L-14-336__openai", | ||||
|     "ViT-L-14-quickgelu__dfn2b", | ||||
|     "ViT-L-14__laion2b-s32b-b82k", | ||||
|     "ViT-L-14__laion400m_e31", | ||||
|     "ViT-L-14__laion400m_e32", | ||||
|     "ViT-L-14__laion2b-s32b-b82k", | ||||
|     "ViT-L-14-336__openai", | ||||
|     "ViT-H-14__laion2b-s32b-b79k", | ||||
|     "ViT-L-14__openai", | ||||
|     "ViT-L-16-SigLIP-256__webli", | ||||
|     "ViT-L-16-SigLIP-384__webli", | ||||
|     "ViT-SO400M-14-SigLIP-384__webli", | ||||
|     "ViT-g-14__laion2b-s12b-b42k", | ||||
|     "ViT-L-14-quickgelu__dfn2b", | ||||
|     "ViT-H-14-quickgelu__dfn5b", | ||||
|     "ViT-H-14-378-quickgelu__dfn5b", | ||||
|     "XLM-Roberta-Base-ViT-B-32__laion5b_s13b_b90k", | ||||
|     "XLM-Roberta-Large-ViT-H-14__frozen_laion5b_s13b_b90k", | ||||
|     "nllb-clip-base-siglip__mrl", | ||||
|     "nllb-clip-base-siglip__v1", | ||||
|     "nllb-clip-large-siglip__mrl", | ||||
|     "nllb-clip-large-siglip__v1", | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| _MCLIP_MODELS = { | ||||
|     "LABSE-Vit-L-14", | ||||
|     "XLM-Roberta-Large-Vit-B-32", | ||||
|     "XLM-Roberta-Large-Vit-B-16Plus", | ||||
|     "XLM-Roberta-Large-Vit-B-32", | ||||
|     "XLM-Roberta-Large-Vit-L-14", | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| _INSIGHTFACE_MODELS = { | ||||
|     "antelopev2", | ||||
|     "buffalo_l", | ||||
|     "buffalo_m", | ||||
|     "buffalo_s", | ||||
|     "buffalo_m", | ||||
|     "buffalo_l", | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -2,7 +2,7 @@ name: base | ||||
| channels: | ||||
|   - conda-forge | ||||
|   - nvidia | ||||
|   - pytorch-nightly | ||||
|   - pytorch | ||||
| platforms: | ||||
|   - linux-64 | ||||
| dependencies: | ||||
| @ -13,7 +13,7 @@ dependencies: | ||||
|   - orjson==3.* | ||||
|   - pip | ||||
|   - python==3.11.* | ||||
|   - pytorch | ||||
|   - pytorch>=2.3 | ||||
|   - rich==13.* | ||||
|   - safetensors==0.* | ||||
|   - setuptools==68.* | ||||
| @ -21,5 +21,5 @@ dependencies: | ||||
|   - transformers==4.* | ||||
|   - pip: | ||||
|     - multilingual-clip | ||||
|     - onnx-simplifier | ||||
|     - onnxsim | ||||
| category: main | ||||
|  | ||||
| @ -1,3 +1,4 @@ | ||||
| import os | ||||
| import tempfile | ||||
| import warnings | ||||
| from pathlib import Path | ||||
| @ -8,7 +9,6 @@ from transformers import AutoTokenizer | ||||
| 
 | ||||
| from .openclip import OpenCLIPModelConfig | ||||
| from .openclip import to_onnx as openclip_to_onnx | ||||
| from .optimize import optimize | ||||
| from .util import get_model_path | ||||
| 
 | ||||
| _MCLIP_TO_OPENCLIP = { | ||||
| @ -23,18 +23,20 @@ def to_onnx( | ||||
|     model_name: str, | ||||
|     output_dir_visual: Path | str, | ||||
|     output_dir_textual: Path | str, | ||||
| ) -> None: | ||||
| ) -> tuple[Path, Path]: | ||||
|     textual_path = get_model_path(output_dir_textual) | ||||
|     with tempfile.TemporaryDirectory() as tmpdir: | ||||
|         model = MultilingualCLIP.from_pretrained(model_name, cache_dir=tmpdir) | ||||
|         model = MultilingualCLIP.from_pretrained(model_name, cache_dir=os.environ.get("CACHE_DIR", tmpdir)) | ||||
|         AutoTokenizer.from_pretrained(model_name).save_pretrained(output_dir_textual) | ||||
| 
 | ||||
|         model.eval() | ||||
|         for param in model.parameters(): | ||||
|             param.requires_grad_(False) | ||||
| 
 | ||||
|         export_text_encoder(model, textual_path) | ||||
|         openclip_to_onnx(_MCLIP_TO_OPENCLIP[model_name], output_dir_visual) | ||||
|         optimize(textual_path) | ||||
|         visual_path, _ = openclip_to_onnx(_MCLIP_TO_OPENCLIP[model_name], output_dir_visual) | ||||
|         assert visual_path is not None, "Visual model export failed" | ||||
|     return visual_path, textual_path | ||||
| 
 | ||||
| 
 | ||||
| def export_text_encoder(model: MultilingualCLIP, output_path: Path | str) -> None: | ||||
| @ -58,10 +60,10 @@ def export_text_encoder(model: MultilingualCLIP, output_path: Path | str) -> Non | ||||
|             args, | ||||
|             output_path.as_posix(), | ||||
|             input_names=["input_ids", "attention_mask"], | ||||
|             output_names=["text_embedding"], | ||||
|             output_names=["embedding"], | ||||
|             opset_version=17, | ||||
|             dynamic_axes={ | ||||
|                 "input_ids": {0: "batch_size", 1: "sequence_length"}, | ||||
|                 "attention_mask": {0: "batch_size", 1: "sequence_length"}, | ||||
|             }, | ||||
|             # dynamic_axes={ | ||||
|             #     "input_ids": {0: "batch_size", 1: "sequence_length"}, | ||||
|             #     "attention_mask": {0: "batch_size", 1: "sequence_length"}, | ||||
|             # }, | ||||
|         ) | ||||
|  | ||||
| @ -1,3 +1,4 @@ | ||||
| import os | ||||
| import tempfile | ||||
| import warnings | ||||
| from dataclasses import dataclass, field | ||||
| @ -7,7 +8,6 @@ import open_clip | ||||
| import torch | ||||
| from transformers import AutoTokenizer | ||||
| 
 | ||||
| from .optimize import optimize | ||||
| from .util import get_model_path, save_config | ||||
| 
 | ||||
| 
 | ||||
| @ -23,25 +23,28 @@ class OpenCLIPModelConfig: | ||||
|         if open_clip_cfg is None: | ||||
|             raise ValueError(f"Unknown model {self.name}") | ||||
|         self.image_size = open_clip_cfg["vision_cfg"]["image_size"] | ||||
|         self.sequence_length = open_clip_cfg["text_cfg"]["context_length"] | ||||
|         self.sequence_length = open_clip_cfg["text_cfg"].get("context_length", 77) | ||||
| 
 | ||||
| 
 | ||||
| def to_onnx( | ||||
|     model_cfg: OpenCLIPModelConfig, | ||||
|     output_dir_visual: Path | str | None = None, | ||||
|     output_dir_textual: Path | str | None = None, | ||||
| ) -> None: | ||||
| ) -> tuple[Path | None, Path | None]: | ||||
|     visual_path = None | ||||
|     textual_path = None | ||||
|     with tempfile.TemporaryDirectory() as tmpdir: | ||||
|         model = open_clip.create_model( | ||||
|             model_cfg.name, | ||||
|             pretrained=model_cfg.pretrained, | ||||
|             jit=False, | ||||
|             cache_dir=tmpdir, | ||||
|             cache_dir=os.environ.get("CACHE_DIR", tmpdir), | ||||
|             require_pretrained=True, | ||||
|         ) | ||||
| 
 | ||||
|         text_vision_cfg = open_clip.get_model_config(model_cfg.name) | ||||
| 
 | ||||
|         model.eval() | ||||
|         for param in model.parameters(): | ||||
|             param.requires_grad_(False) | ||||
| 
 | ||||
| @ -53,8 +56,6 @@ def to_onnx( | ||||
|             save_config(text_vision_cfg, output_dir_visual.parent / "config.json") | ||||
|             export_image_encoder(model, model_cfg, visual_path) | ||||
| 
 | ||||
|             optimize(visual_path) | ||||
| 
 | ||||
|         if output_dir_textual is not None: | ||||
|             output_dir_textual = Path(output_dir_textual) | ||||
|             textual_path = get_model_path(output_dir_textual) | ||||
| @ -62,7 +63,7 @@ def to_onnx( | ||||
|             tokenizer_name = text_vision_cfg["text_cfg"].get("hf_tokenizer_name", "openai/clip-vit-base-patch32") | ||||
|             AutoTokenizer.from_pretrained(tokenizer_name).save_pretrained(output_dir_textual) | ||||
|             export_text_encoder(model, model_cfg, textual_path) | ||||
|             optimize(textual_path) | ||||
|     return visual_path, textual_path | ||||
| 
 | ||||
| 
 | ||||
| def export_image_encoder(model: open_clip.CLIP, model_cfg: OpenCLIPModelConfig, output_path: Path | str) -> None: | ||||
| @ -83,9 +84,9 @@ def export_image_encoder(model: open_clip.CLIP, model_cfg: OpenCLIPModelConfig, | ||||
|             args, | ||||
|             output_path.as_posix(), | ||||
|             input_names=["image"], | ||||
|             output_names=["image_embedding"], | ||||
|             output_names=["embedding"], | ||||
|             opset_version=17, | ||||
|             dynamic_axes={"image": {0: "batch_size"}}, | ||||
|             # dynamic_axes={"image": {0: "batch_size"}}, | ||||
|         ) | ||||
| 
 | ||||
| 
 | ||||
| @ -107,7 +108,7 @@ def export_text_encoder(model: open_clip.CLIP, model_cfg: OpenCLIPModelConfig, o | ||||
|             args, | ||||
|             output_path.as_posix(), | ||||
|             input_names=["text"], | ||||
|             output_names=["text_embedding"], | ||||
|             output_names=["embedding"], | ||||
|             opset_version=17, | ||||
|             dynamic_axes={"text": {0: "batch_size"}}, | ||||
|             # dynamic_axes={"text": {0: "batch_size"}}, | ||||
|         ) | ||||
|  | ||||
| @ -5,13 +5,26 @@ import onnxruntime as ort | ||||
| import onnxsim | ||||
| 
 | ||||
| 
 | ||||
| def save_onnx(model: onnx.ModelProto, output_path: Path | str) -> None: | ||||
|     try: | ||||
|         onnx.save(model, output_path) | ||||
|     except ValueError as e: | ||||
|         if "The proto size is larger than the 2 GB limit." in str(e): | ||||
|             onnx.save(model, output_path, save_as_external_data=True, size_threshold=1_000_000) | ||||
|         else: | ||||
|             raise e | ||||
| 
 | ||||
| 
 | ||||
| def optimize_onnxsim(model_path: Path | str, output_path: Path | str) -> None: | ||||
|     model_path = Path(model_path) | ||||
|     output_path = Path(output_path) | ||||
|     model = onnx.load(model_path.as_posix()) | ||||
|     model, check = onnxsim.simplify(model, skip_shape_inference=True) | ||||
|     model, check = onnxsim.simplify(model) | ||||
|     assert check, "Simplified ONNX model could not be validated" | ||||
|     onnx.save(model, output_path.as_posix()) | ||||
|     for file in model_path.parent.iterdir(): | ||||
|         if file.name.startswith("Constant") or "onnx" in file.name or file.suffix == ".weight": | ||||
|             file.unlink() | ||||
|     save_onnx(model, output_path) | ||||
| 
 | ||||
| 
 | ||||
| def optimize_ort( | ||||
| @ -33,6 +46,4 @@ def optimize(model_path: Path | str) -> None: | ||||
|     model_path = Path(model_path) | ||||
| 
 | ||||
|     optimize_ort(model_path, model_path) | ||||
|     # onnxsim serializes large models as a blob, which uses much more memory when loading the model at runtime | ||||
|     if not any(file.name.startswith("Constant") for file in model_path.parent.iterdir()): | ||||
|         optimize_onnxsim(model_path, model_path) | ||||
|     optimize_onnxsim(model_path, model_path) | ||||
|  | ||||
| @ -3,74 +3,111 @@ import os | ||||
| from pathlib import Path | ||||
| from tempfile import TemporaryDirectory | ||||
| 
 | ||||
| from huggingface_hub import create_repo, login, upload_folder | ||||
| import torch | ||||
| from huggingface_hub import create_repo, upload_folder | ||||
| from models import mclip, openclip | ||||
| from models.optimize import optimize | ||||
| from rich.progress import Progress | ||||
| 
 | ||||
| models = [ | ||||
|     "RN50::openai", | ||||
|     "RN50::yfcc15m", | ||||
|     "RN50::cc12m", | ||||
|     "M-CLIP/LABSE-Vit-L-14", | ||||
|     "M-CLIP/XLM-Roberta-Large-Vit-B-16Plus", | ||||
|     "M-CLIP/XLM-Roberta-Large-Vit-B-32", | ||||
|     "M-CLIP/XLM-Roberta-Large-Vit-L-14", | ||||
|     "RN101::openai", | ||||
|     "RN101::yfcc15m", | ||||
|     "RN50x4::openai", | ||||
|     "RN50::cc12m", | ||||
|     "RN50::openai", | ||||
|     "RN50::yfcc15m", | ||||
|     "RN50x16::openai", | ||||
|     "RN50x4::openai", | ||||
|     "RN50x64::openai", | ||||
|     "ViT-B-32::openai", | ||||
|     "ViT-B-16-SigLIP-256::webli", | ||||
|     "ViT-B-16-SigLIP-384::webli", | ||||
|     "ViT-B-16-SigLIP-512::webli", | ||||
|     "ViT-B-16-SigLIP-i18n-256::webli", | ||||
|     "ViT-B-16-SigLIP::webli", | ||||
|     "ViT-B-16-plus-240::laion400m_e31", | ||||
|     "ViT-B-16-plus-240::laion400m_e32", | ||||
|     "ViT-B-16::laion400m_e31", | ||||
|     "ViT-B-16::laion400m_e32", | ||||
|     "ViT-B-16::openai", | ||||
|     "ViT-B-32::laion2b-s34b-b79k", | ||||
|     "ViT-B-32::laion2b_e16", | ||||
|     "ViT-B-32::laion400m_e31", | ||||
|     "ViT-B-32::laion400m_e32", | ||||
|     "ViT-B-32::laion2b-s34b-b79k", | ||||
|     "ViT-B-16::openai", | ||||
|     "ViT-B-16::laion400m_e31", | ||||
|     "ViT-B-16::laion400m_e32", | ||||
|     "ViT-B-16-plus-240::laion400m_e31", | ||||
|     "ViT-B-16-plus-240::laion400m_e32", | ||||
|     "ViT-L-14::openai", | ||||
|     "ViT-B-32::openai", | ||||
|     "ViT-H-14-378-quickgelu::dfn5b", | ||||
|     "ViT-H-14-quickgelu::dfn5b", | ||||
|     "ViT-H-14::laion2b-s32b-b79k", | ||||
|     "ViT-L-14-336::openai", | ||||
|     "ViT-L-14-quickgelu::dfn2b", | ||||
|     "ViT-L-14::laion2b-s32b-b82k", | ||||
|     "ViT-L-14::laion400m_e31", | ||||
|     "ViT-L-14::laion400m_e32", | ||||
|     "ViT-L-14::laion2b-s32b-b82k", | ||||
|     "ViT-L-14-336::openai", | ||||
|     "ViT-H-14::laion2b-s32b-b79k", | ||||
|     "ViT-L-14::openai", | ||||
|     "ViT-L-16-SigLIP-256::webli", | ||||
|     "ViT-L-16-SigLIP-384::webli", | ||||
|     "ViT-SO400M-14-SigLIP-384::webli", | ||||
|     "ViT-g-14::laion2b-s12b-b42k", | ||||
|     "M-CLIP/LABSE-Vit-L-14", | ||||
|     "M-CLIP/XLM-Roberta-Large-Vit-B-32", | ||||
|     "M-CLIP/XLM-Roberta-Large-Vit-B-16Plus", | ||||
|     "M-CLIP/XLM-Roberta-Large-Vit-L-14", | ||||
|     "nllb-clip-base-siglip::mrl", | ||||
|     "nllb-clip-base-siglip::v1", | ||||
|     "nllb-clip-large-siglip::mrl", | ||||
|     "nllb-clip-large-siglip::v1", | ||||
|     "xlm-roberta-base-ViT-B-32::laion5b_s13b_b90k", | ||||
|     "xlm-roberta-large-ViT-H-14::frozen_laion5b_s13b_b90k", | ||||
| ] | ||||
| 
 | ||||
| login(token=os.environ["HF_AUTH_TOKEN"]) | ||||
| # glob to delete old UUID blobs when reuploading models | ||||
| uuid_char = "[a-fA-F0-9]" | ||||
| uuid_glob = uuid_char * 8 + "-" + uuid_char * 4 + "-" + uuid_char * 4 + "-" + uuid_char * 4 + "-" + uuid_char * 12 | ||||
| 
 | ||||
| # remote repo files to be deleted before uploading | ||||
| # deletion is in the same commit as the upload, so it's atomic | ||||
| delete_patterns = ["**/*onnx*", "**/Constant*", "**/*.weight", "**/*.bias", f"**/{uuid_glob}"] | ||||
| 
 | ||||
| with Progress() as progress: | ||||
|     task1 = progress.add_task("[green]Exporting models...", total=len(models)) | ||||
|     task2 = progress.add_task("[yellow]Uploading models...", total=len(models)) | ||||
| 
 | ||||
|     task = progress.add_task("[green]Exporting models...", total=len(models)) | ||||
|     token = os.environ.get("HF_AUTH_TOKEN") | ||||
|     torch.backends.mha.set_fastpath_enabled(False) | ||||
|     with TemporaryDirectory() as tmp: | ||||
|         tmpdir = Path(tmp) | ||||
|         for model in models: | ||||
|             model_name = model.split("/")[-1].replace("::", "__") | ||||
|             hf_model_name = model_name.replace("xlm-roberta-large", "XLM-Roberta-Large") | ||||
|             hf_model_name = model_name.replace("xlm-roberta-base", "XLM-Roberta-Base") | ||||
|             config_path = tmpdir / model_name / "config.json" | ||||
| 
 | ||||
|             def upload() -> None: | ||||
|                 progress.update(task2, description=f"[yellow]Uploading {model_name}") | ||||
|                 repo_id = f"immich-app/{model_name}" | ||||
| 
 | ||||
|                 create_repo(repo_id, exist_ok=True) | ||||
|                 upload_folder(repo_id=repo_id, folder_path=tmpdir / model_name) | ||||
|                 progress.update(task2, advance=1) | ||||
| 
 | ||||
|             def export() -> None: | ||||
|                 progress.update(task1, description=f"[green]Exporting {model_name}") | ||||
|                 visual_dir = tmpdir / model_name / "visual" | ||||
|                 textual_dir = tmpdir / model_name / "textual" | ||||
|                 progress.update(task, description=f"[green]Exporting {hf_model_name}") | ||||
|                 visual_dir = tmpdir / hf_model_name / "visual" | ||||
|                 textual_dir = tmpdir / hf_model_name / "textual" | ||||
|                 if model.startswith("M-CLIP"): | ||||
|                     mclip.to_onnx(model, visual_dir, textual_dir) | ||||
|                     visual_path, textual_path = mclip.to_onnx(model, visual_dir, textual_dir) | ||||
|                 else: | ||||
|                     name, _, pretrained = model_name.partition("__") | ||||
|                     openclip.to_onnx(openclip.OpenCLIPModelConfig(name, pretrained), visual_dir, textual_dir) | ||||
|                     config = openclip.OpenCLIPModelConfig(name, pretrained) | ||||
|                     visual_path, textual_path = openclip.to_onnx(config, visual_dir, textual_dir) | ||||
|                 progress.update(task, description=f"[green]Optimizing {hf_model_name} (visual)") | ||||
|                 optimize(visual_path) | ||||
|                 progress.update(task, description=f"[green]Optimizing {hf_model_name} (textual)") | ||||
|                 optimize(textual_path) | ||||
| 
 | ||||
|                 progress.update(task1, advance=1) | ||||
|                 gc.collect() | ||||
| 
 | ||||
|             def upload() -> None: | ||||
|                 progress.update(task, description=f"[yellow]Uploading {hf_model_name}") | ||||
|                 repo_id = f"immich-app/{hf_model_name}" | ||||
| 
 | ||||
|                 create_repo(repo_id, exist_ok=True) | ||||
|                 upload_folder( | ||||
|                     repo_id=repo_id, | ||||
|                     folder_path=tmpdir / hf_model_name, | ||||
|                     delete_patterns=delete_patterns, | ||||
|                     token=token, | ||||
|                 ) | ||||
| 
 | ||||
|             export() | ||||
|             upload() | ||||
|             if token is not None: | ||||
|                 upload() | ||||
|             progress.update(task, advance=1) | ||||
|  | ||||
| @ -93,39 +93,50 @@ export const supportedPresetTokens = [ | ||||
| 
 | ||||
| type ModelInfo = { dimSize: number }; | ||||
| export const CLIP_MODEL_INFO: Record<string, ModelInfo> = { | ||||
|   RN50__openai: { dimSize: 1024 }, | ||||
|   RN50__yfcc15m: { dimSize: 1024 }, | ||||
|   RN50__cc12m: { dimSize: 1024 }, | ||||
|   RN101__openai: { dimSize: 512 }, | ||||
|   RN101__yfcc15m: { dimSize: 512 }, | ||||
|   RN50x4__openai: { dimSize: 640 }, | ||||
|   RN50x16__openai: { dimSize: 768 }, | ||||
|   RN50x64__openai: { dimSize: 1024 }, | ||||
|   'ViT-B-32__openai': { dimSize: 512 }, | ||||
|   'ViT-B-16__laion400m_e31': { dimSize: 512 }, | ||||
|   'ViT-B-16__laion400m_e32': { dimSize: 512 }, | ||||
|   'ViT-B-16__openai': { dimSize: 512 }, | ||||
|   'ViT-B-32__laion2b-s34b-b79k': { dimSize: 512 }, | ||||
|   'ViT-B-32__laion2b_e16': { dimSize: 512 }, | ||||
|   'ViT-B-32__laion400m_e31': { dimSize: 512 }, | ||||
|   'ViT-B-32__laion400m_e32': { dimSize: 512 }, | ||||
|   'ViT-B-32__laion2b-s34b-b79k': { dimSize: 512 }, | ||||
|   'ViT-B-16__openai': { dimSize: 512 }, | ||||
|   'ViT-B-16__laion400m_e31': { dimSize: 512 }, | ||||
|   'ViT-B-16__laion400m_e32': { dimSize: 512 }, | ||||
|   'ViT-B-32__openai': { dimSize: 512 }, | ||||
|   'XLM-Roberta-Base-ViT-B-32__laion5b_s13b_b90k': { dimSize: 512 }, | ||||
|   'XLM-Roberta-Large-Vit-B-32': { dimSize: 512 }, | ||||
|   RN50x4__openai: { dimSize: 640 }, | ||||
|   'ViT-B-16-plus-240__laion400m_e31': { dimSize: 640 }, | ||||
|   'ViT-B-16-plus-240__laion400m_e32': { dimSize: 640 }, | ||||
|   'ViT-L-14__openai': { dimSize: 768 }, | ||||
|   'ViT-L-14__laion400m_e31': { dimSize: 768 }, | ||||
|   'ViT-L-14__laion400m_e32': { dimSize: 768 }, | ||||
|   'ViT-L-14__laion2b-s32b-b82k': { dimSize: 768 }, | ||||
|   'XLM-Roberta-Large-Vit-B-16Plus': { dimSize: 640 }, | ||||
|   'LABSE-Vit-L-14': { dimSize: 768 }, | ||||
|   RN50x16__openai: { dimSize: 768 }, | ||||
|   'ViT-B-16-SigLIP-256__webli': { dimSize: 768 }, | ||||
|   'ViT-B-16-SigLIP-384__webli': { dimSize: 768 }, | ||||
|   'ViT-B-16-SigLIP-512__webli': { dimSize: 768 }, | ||||
|   'ViT-B-16-SigLIP-i18n-256__webli': { dimSize: 768 }, | ||||
|   'ViT-B-16-SigLIP__webli': { dimSize: 768 }, | ||||
|   'ViT-L-14-336__openai': { dimSize: 768 }, | ||||
|   'ViT-L-14-quickgelu__dfn2b': { dimSize: 768 }, | ||||
|   'ViT-H-14__laion2b-s32b-b79k': { dimSize: 1024 }, | ||||
|   'ViT-H-14-quickgelu__dfn5b': { dimSize: 1024 }, | ||||
|   'ViT-H-14-378-quickgelu__dfn5b': { dimSize: 1024 }, | ||||
|   'ViT-g-14__laion2b-s12b-b42k': { dimSize: 1024 }, | ||||
|   'LABSE-Vit-L-14': { dimSize: 768 }, | ||||
|   'XLM-Roberta-Large-Vit-B-32': { dimSize: 512 }, | ||||
|   'XLM-Roberta-Large-Vit-B-16Plus': { dimSize: 640 }, | ||||
|   'ViT-L-14__laion2b-s32b-b82k': { dimSize: 768 }, | ||||
|   'ViT-L-14__laion400m_e31': { dimSize: 768 }, | ||||
|   'ViT-L-14__laion400m_e32': { dimSize: 768 }, | ||||
|   'ViT-L-14__openai': { dimSize: 768 }, | ||||
|   'XLM-Roberta-Large-Vit-L-14': { dimSize: 768 }, | ||||
|   'XLM-Roberta-Large-ViT-H-14__frozen_laion5b_s13b_b90k': { dimSize: 1024 }, | ||||
|   'nllb-clip-base-siglip__mrl': { dimSize: 768 }, | ||||
|   'nllb-clip-base-siglip__v1': { dimSize: 768 }, | ||||
|   RN50__cc12m: { dimSize: 1024 }, | ||||
|   RN50__openai: { dimSize: 1024 }, | ||||
|   RN50__yfcc15m: { dimSize: 1024 }, | ||||
|   RN50x64__openai: { dimSize: 1024 }, | ||||
|   'ViT-H-14-378-quickgelu__dfn5b': { dimSize: 1024 }, | ||||
|   'ViT-H-14-quickgelu__dfn5b': { dimSize: 1024 }, | ||||
|   'ViT-H-14__laion2b-s32b-b79k': { dimSize: 1024 }, | ||||
|   'ViT-L-16-SigLIP-256__webli': { dimSize: 1024 }, | ||||
|   'ViT-L-16-SigLIP-384__webli': { dimSize: 1024 }, | ||||
|   'ViT-g-14__laion2b-s12b-b42k': { dimSize: 1024 }, | ||||
|   'XLM-Roberta-Large-ViT-H-14__frozen_laion5b_s13b_b90k': { dimSize: 1024 }, | ||||
|   'ViT-SO400M-14-SigLIP-384__webli': { dimSize: 1152 }, | ||||
|   'nllb-clip-large-siglip__mrl': { dimSize: 1152 }, | ||||
|   'nllb-clip-large-siglip__v1': { dimSize: 1152 }, | ||||
| }; | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user