diff --git a/machine-learning/export/immich_model_exporter/export.py b/machine-learning/export/immich_model_exporter/export.py index 1f645a3276..2b4fb52b18 100644 --- a/machine-learning/export/immich_model_exporter/export.py +++ b/machine-learning/export/immich_model_exporter/export.py @@ -1,9 +1,10 @@ from pathlib import Path import typer -from exporters.constants import SOURCE_TO_METADATA, ModelSource +from exporters.constants import DELETE_PATTERNS, SOURCE_TO_METADATA, ModelSource from exporters.onnx import export as onnx_export from exporters.rknn import export as rknn_export +from tenacity import retry, stop_after_attempt from typing_extensions import Annotated app = typer.Typer(pretty_exceptions_show_locals=False) @@ -45,7 +46,8 @@ def main( hf_organization: str = "immich-app", hf_auth_token: Annotated[str | None, typer.Option(envvar="HF_AUTH_TOKEN")] = None, ): - hf_model_name = model_name.replace("xlm-roberta-large", "XLM-Roberta-Large") + hf_model_name = model_name.split("/")[-1] + hf_model_name = hf_model_name.replace("xlm-roberta-large", "XLM-Roberta-Large") hf_model_name = hf_model_name.replace("xlm-roberta-base", "XLM-Roberta-Base") output_dir = output_dir / hf_model_name match model_source: @@ -75,21 +77,20 @@ def main( from huggingface_hub import create_repo, upload_folder repo_id = f"{hf_organization}/{hf_model_name}" - create_repo(repo_id, exist_ok=True, token=hf_auth_token) - # glob to delete old UUID blobs when reuploading models - uuid_char = "[a-fA-F0-9]" - uuid_glob = ( - uuid_char * 8 + "-" + uuid_char * 4 + "-" + uuid_char * 4 + "-" + uuid_char * 4 + "-" + uuid_char * 12 - ) - upload_folder( - repo_id=repo_id, - folder_path=output_dir, - # remote repo files to be deleted before uploading - # deletion is in the same commit as the upload, so it's atomic - delete_patterns=[f"**/{uuid_glob}"], - token=hf_auth_token, - ) + @retry(stop=stop_after_attempt(3), wait=2) + def upload_model(): + create_repo(repo_id, exist_ok=True, token=hf_auth_token) + upload_folder( + repo_id=repo_id, + folder_path=output_dir, + # remote repo files to be deleted before uploading + # deletion is in the same commit as the upload, so it's atomic + delete_patterns=DELETE_PATTERNS, + token=hf_auth_token, + ) + + upload_model() if __name__ == "__main__": diff --git a/machine-learning/export/immich_model_exporter/exporters/constants.py b/machine-learning/export/immich_model_exporter/exporters/constants.py index 6686514286..8871535ea1 100644 --- a/machine-learning/export/immich_model_exporter/exporters/constants.py +++ b/machine-learning/export/immich_model_exporter/exporters/constants.py @@ -21,3 +21,34 @@ SOURCE_TO_METADATA = { "InsightFace", "https://github.com/deepinsight/insightface/tree/master", "facial recognition" ), } + +RKNN_SOCS = ["rk3566", "rk3576", "rk3588"] + +# the builder hangs when using flash attention with these models +RKNN_VISUAL_FLASH_ATTENTION_BLACKLIST = { + "ViT-H-14-378-quickgelu__dfn5b", + "ViT-L-16-SigLIP-384__webli", + "ViT-L-16-SigLIP2-384__webli", + "ViT-L-16-SigLIP2-512__webli", + "ViT-SO400M-14-SigLIP-384__webli", + "ViT-SO400M-14-SigLIP2-378__webli", + "ViT-SO400M-16-SigLIP2-378__webli", + "ViT-SO400M-16-SigLIP2-512__webli", +} + + +# glob to delete old UUID blobs when reuploading models +_uuid_char = "[a-fA-F0-9]" +_uuid_glob = _uuid_char * 8 + "-" + _uuid_char * 4 + "-" + _uuid_char * 4 + "-" + _uuid_char * 4 + "-" + _uuid_char * 12 +DELETE_PATTERNS = [ + "**/*onnx*", + "**/Constant*", + "**/*.weight", + "**/*.bias", + "**/*.proj", + "**/*in_proj_bias", + "**/*.npy", + "**/*.latent", + "**/*.pos_embed", + f"**/{_uuid_glob}", +] diff --git a/machine-learning/export/immich_model_exporter/exporters/onnx/models/mclip.py b/machine-learning/export/immich_model_exporter/exporters/onnx/models/mclip.py index f2f9f3d414..9a27e85982 100644 --- a/machine-learning/export/immich_model_exporter/exporters/onnx/models/mclip.py +++ b/machine-learning/export/immich_model_exporter/exporters/onnx/models/mclip.py @@ -38,7 +38,9 @@ def to_onnx( _export_text_encoder(model, textual_path, opset_version) else: print(f"Model {textual_path} already exists, skipping") - visual_path, _ = openclip_to_onnx(_MCLIP_TO_OPENCLIP[model_name], output_dir_visual, no_cache=no_cache) + visual_path, _ = openclip_to_onnx( + _MCLIP_TO_OPENCLIP[model_name], opset_version, output_dir_visual, no_cache=no_cache + ) assert visual_path is not None, "Visual model export failed" return visual_path, textual_path diff --git a/machine-learning/export/immich_model_exporter/exporters/onnx/models/openclip.py b/machine-learning/export/immich_model_exporter/exporters/onnx/models/openclip.py index e45d840088..8862fc6a5a 100644 --- a/machine-learning/export/immich_model_exporter/exporters/onnx/models/openclip.py +++ b/machine-learning/export/immich_model_exporter/exporters/onnx/models/openclip.py @@ -105,13 +105,14 @@ def _export_image_encoder( assert isinstance(output, torch.Tensor) return output + model.forward = encode_image + args = (torch.randn(1, 3, model_cfg.image_size, model_cfg.image_size),) - traced = torch.jit.trace(encode_image, args) # type: ignore[no-untyped-call] with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) torch.onnx.export( - traced, + model, args, output_path.as_posix(), input_names=["image"], @@ -133,13 +134,14 @@ def _export_text_encoder( assert isinstance(output, torch.Tensor) return output + model.forward = encode_text + args = (torch.ones(1, model_cfg.sequence_length, dtype=torch.int32),) - traced = torch.jit.trace(encode_text, args) # type: ignore[no-untyped-call] with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) torch.onnx.export( - traced, + model, args, output_path.as_posix(), input_names=["text"], diff --git a/machine-learning/export/immich_model_exporter/exporters/rknn.py b/machine-learning/export/immich_model_exporter/exporters/rknn.py index 6ef539f2c3..e8a61c53ee 100644 --- a/machine-learning/export/immich_model_exporter/exporters/rknn.py +++ b/machine-learning/export/immich_model_exporter/exporters/rknn.py @@ -1,6 +1,6 @@ from pathlib import Path -RKNN_SOCS = ["rk3566", "rk3576", "rk3588"] +from .constants import RKNN_SOCS, RKNN_VISUAL_FLASH_ATTENTION_BLACKLIST def _export_platform( @@ -22,11 +22,12 @@ def _export_platform( rknn = RKNN(verbose=False) + # flash_attention = model_dir.name != "visual" or model_dir.parent.name not in RKNN_VISUAL_FLASH_ATTENTION_BLACKLIST rknn.config( target_platform=target_platform, dynamic_input=dynamic_input, disable_rules=["fuse_matmul_softmax_matmul_to_sdpa"] if not fuse_matmul_softmax_matmul_to_sdpa else [], - enable_flash_attention=True, + enable_flash_attention=False, model_pruning=True, ) ret = rknn.load_onnx(model=input_path.as_posix()) @@ -49,13 +50,13 @@ def _export_platforms(model_dir: Path, dynamic_input=None, no_cache: bool = Fals fuse_matmul_softmax_matmul_to_sdpa = True for soc in RKNN_SOCS: try: - _export_platform(model_dir, soc, dynamic_input, fuse_matmul_softmax_matmul_to_sdpa) + _export_platform(model_dir, soc, dynamic_input, fuse_matmul_softmax_matmul_to_sdpa, no_cache=no_cache) except Exception as e: print(f"Failed to export model for {soc}: {e}") if "inputs or 'outputs' must be set" in str(e): print("Retrying without fuse_matmul_softmax_matmul_to_sdpa") fuse_matmul_softmax_matmul_to_sdpa = False - _export_platform(model_dir, soc, dynamic_input, fuse_matmul_softmax_matmul_to_sdpa) + _export_platform(model_dir, soc, dynamic_input, fuse_matmul_softmax_matmul_to_sdpa, no_cache=no_cache) def export(model_dir: Path, no_cache: bool = False): diff --git a/machine-learning/export/immich_model_exporter/run.py b/machine-learning/export/immich_model_exporter/run.py index 8fd11c76e2..ec3f21aafa 100644 --- a/machine-learning/export/immich_model_exporter/run.py +++ b/machine-learning/export/immich_model_exporter/run.py @@ -1,71 +1,69 @@ import subprocess models = [ - # "ViT-B-16-SigLIP__webli", - # "ViT-B-16-SigLIP-256__webli", - # "ViT-B-16-SigLIP-384__webli", - # "ViT-B-16-SigLIP-512__webli", - # "ViT-B-16-SigLIP-i18n-256__webli", - # "ViT-B-16-plus-240__laion400m_e31", - # "ViT-B-16-plus-240__laion400m_e32", - # "ViT-B-16__laion400m_e31", - # "ViT-B-16__laion400m_e32", - # "ViT-B-16__openai", - # "ViT-B-32__laion2b-s34b-b79k", - # "ViT-B-32__laion2b_e16", - # "ViT-B-32__laion400m_e31", - # "ViT-B-32__laion400m_e32", - # "ViT-B-32__openai", - # "ViT-L-14-336__openai", - # "ViT-B-16-SigLIP2__webli", - # "ViT-B-32-SigLIP2-256__webli", - # "ViT-B-32-SigLIP2-384__webli", # not available yet - # "ViT-B-32-SigLIP2-512__webli", # not available yet - # "ViT-L-16-SigLIP2-256__webli", - # "ViT-L-16-SigLIP2-384__webli", # rknn seems to hang - # "ViT-L-16-SigLIP2-512__webli", - "ViT-SO400M-14-SigLIP2__webli", + "M-CLIP/LABSE-Vit-L-14", + "M-CLIP/XLM-Roberta-Large-Vit-B-16Plus", + "M-CLIP/XLM-Roberta-Large-Vit-B-32", + "M-CLIP/XLM-Roberta-Large-Vit-L-14", + "RN101__openai", + "RN101__yfcc15m", + "RN50__cc12m", + "RN50__openai", + "RN50__yfcc15m", + "RN50x16__openai", + "RN50x4__openai", + "RN50x64__openai", + "ViT-B-16-SigLIP-256__webli", + "ViT-B-16-SigLIP-384__webli", + "ViT-B-16-SigLIP-512__webli", + "ViT-B-16-SigLIP-i18n-256__webli", + "ViT-B-16-SigLIP2__webli", + "ViT-B-16-SigLIP__webli", + "ViT-B-16-plus-240__laion400m_e31", + "ViT-B-16-plus-240__laion400m_e32", + "ViT-B-16__laion400m_e31", + "ViT-B-16__laion400m_e32", + "ViT-B-16__openai", + "ViT-B-32-SigLIP2-256__webli", + "ViT-B-32__laion2b-s34b-b79k", + "ViT-B-32__laion2b_e16", + "ViT-B-32__laion400m_e31", + "ViT-B-32__laion400m_e32", + "ViT-B-32__openai", + "ViT-H-14-378-quickgelu__dfn5b", + "ViT-H-14-quickgelu__dfn5b", + "ViT-H-14__laion2b-s32b-b79k", + "ViT-L-14-336__openai", + "ViT-L-14-quickgelu__dfn2b", + "ViT-L-14__laion2b-s32b-b82k", + "ViT-L-14__laion400m_e31", + "ViT-L-14__laion400m_e32", + "ViT-L-14__openai", + "ViT-L-16-SigLIP-256__webli", + "ViT-L-16-SigLIP-384__webli", + "ViT-L-16-SigLIP2-256__webli", + "ViT-L-16-SigLIP2-384__webli", + "ViT-L-16-SigLIP2-512__webli", + "ViT-SO400M-14-SigLIP-384__webli", "ViT-SO400M-14-SigLIP2-378__webli", + "ViT-SO400M-14-SigLIP2__webli", "ViT-SO400M-16-SigLIP2-256__webli", "ViT-SO400M-16-SigLIP2-384__webli", "ViT-SO400M-16-SigLIP2-512__webli", - # "ViT-gopt-16-SigLIP2-256__webli", - # "ViT-gopt-16-SigLIP2-384__webli", - # "ViT-L-14-quickgelu__dfn2b", - # "ViT-L-14__laion2b-s32b-b82k", - # "ViT-L-14__laion400m_e31", - # "ViT-L-14__laion400m_e32", - # "ViT-L-14__openai", - # "ViT-L-16-SigLIP-256__webli", - # "ViT-L-16-SigLIP-384__webli", - # "ViT-SO400M-14-SigLIP-384__webli", - # "ViT-H-14__laion2b-s32b-b79k", - # "ViT-H-14-quickgelu__dfn5b", - # "ViT-H-14-378-quickgelu__dfn5b", - # "RN101__openai", - # "RN101__yfcc15m", - # "RN50__cc12m", - # "RN50__openai", - # "RN50__yfcc15m", - # "RN50x16__openai", - # "RN50x4__openai", - # "RN50x64__openai", - # "nllb-clip-base-siglip__mrl", - # "nllb-clip-base-siglip__v1", - # "nllb-clip-large-siglip__mrl", - # "nllb-clip-large-siglip__v1", - # "xlm-roberta-base-ViT-B-32__laion5b_s13b_b90k", - # "xlm-roberta-large-ViT-H-14__frozen_laion5b_s13b_b90k", - # "M-CLIP/LABSE-Vit-L-14", - # "M-CLIP/XLM-Roberta-Large-Vit-B-16Plus", - # "M-CLIP/XLM-Roberta-Large-Vit-B-32", - # "M-CLIP/XLM-Roberta-Large-Vit-L-14", + "ViT-gopt-16-SigLIP2-256__webli", + "ViT-gopt-16-SigLIP2-384__webli", + "nllb-clip-base-siglip__mrl", + "nllb-clip-base-siglip__v1", + "nllb-clip-large-siglip__mrl", + "nllb-clip-large-siglip__v1", + "xlm-roberta-base-ViT-B-32__laion5b_s13b_b90k", + "xlm-roberta-large-ViT-H-14__frozen_laion5b_s13b_b90k", ] if __name__ == "__main__": for model in models: try: print(f"Exporting model {model}") - subprocess.check_call(["python", "export.py", model, "openclip"]) + subprocess.check_call(["python", "export.py", model, "mclip" if "M-CLIP" in model else "openclip"]) except Exception as e: print(f"Failed to export model {model}: {e}") diff --git a/machine-learning/export/pyproject.toml b/machine-learning/export/pyproject.toml index fc8bfc35ab..dc6c19d57c 100644 --- a/machine-learning/export/pyproject.toml +++ b/machine-learning/export/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ "typer>=0.15.2", "rknn-toolkit2>=2.3.0", "transformers>=4.49.0", + "tenacity>=9.0.0", ] [tool.uv] diff --git a/machine-learning/export/uv.lock b/machine-learning/export/uv.lock index ed6c15da28..ca5ee3a456 100644 --- a/machine-learning/export/uv.lock +++ b/machine-learning/export/uv.lock @@ -231,6 +231,7 @@ dependencies = [ { name = "onnxruntime" }, { name = "open-clip-torch" }, { name = "rknn-toolkit2" }, + { name = "tenacity" }, { name = "transformers" }, { name = "typer" }, ] @@ -243,6 +244,7 @@ requires-dist = [ { name = "onnxruntime", specifier = ">=1.16.0" }, { name = "open-clip-torch", specifier = ">=2.31.0" }, { name = "rknn-toolkit2", specifier = ">=2.3.0" }, + { name = "tenacity", specifier = ">=9.0.0" }, { name = "transformers", specifier = ">=4.49.0" }, { name = "typer", specifier = ">=0.15.2" }, ] @@ -971,6 +973,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/fe/81695a1aa331a842b582453b605175f419fe8540355886031328089d840a/sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8", size = 6189177 }, ] +[[package]] +name = "tenacity" +version = "9.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/94/91fccdb4b8110642462e653d5dcb27e7b674742ad68efd146367da7bdb10/tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b", size = 47421 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/cb/b86984bed139586d01532a587464b5805f12e397594f19f931c4c2fbfa61/tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539", size = 28169 }, +] + [[package]] name = "timm" version = "1.0.15"