fix(ml): handle empty/corrupt images in face detection (#27391)

* fix(ml): handle empty/corrupt images in face detection When a corrupt or degenerate image with zero-dimension (0 width or 0 height) reaches the face detection pipeline, insightface's RetinaFace.detect() calls cv2.resize() with a target size of 0, triggering an OpenCV assertion failure: error: (-215:Assertion failed) inv_scale_x > 0 in function 'resize' This crashes the ML worker and returns a 500 error to the server. Add an early return in FaceDetector._predict() that checks for zero-dimension images after decoding and returns empty detection results instead of passing them to the insightface model. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(ml): move empty image validation to request level Per review feedback, validate image dimensions in the predict endpoint (returning 400) rather than in each model's _predict method. This catches all zero-dimension images before they reach any model task. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(ml): resolve mypy strict type error in predict endpoint Use intermediate `decoded` variable so mypy knows `.width` and `.height` are accessed on `Image`, not on `Image | str`. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-28 20:10:42 -04:00 · 2026-04-27 18:14:34 +03:00 · 2026-04-27 18:14:34 +03:00 · 5e89efba64
commit 5e89efba64
parent 5a457d72c9
2 changed files with 17 additions and 1 deletions
--- a/machine-learning/immich_ml/main.py
+++ b/machine-learning/immich_ml/main.py
@ -183,7 +183,10 @@ async def predict(
    text: str | None = Form(default=None),
 ) -> Any:
    if image is not None:
-        inputs: Image | str = await run(lambda: decode_pil(image))
+        decoded = await run(lambda: decode_pil(image))
+        if decoded.width == 0 or decoded.height == 0:
+            raise HTTPException(400, "Image has zero width or height")
+        inputs: Image | str = decoded
    elif text is not None:
        inputs = text
    else:
--- a/machine-learning/test_main.py
+++ b/machine-learning/test_main.py
@ -1198,6 +1198,19 @@ class TestLoad:
        mock_model.model_format = ModelFormat.ONNX


+@pytest.mark.parametrize("size", [(0, 100), (100, 0), (0, 0)])
+def test_predict_rejects_empty_image(size: tuple[int, int], deployed_app: TestClient) -> None:
+    with mock.patch("immich_ml.main.decode_pil", return_value=Image.new("RGB", size)):
+        response = deployed_app.post(
+            "http://localhost:3003/predict",
+            data={"entries": json.dumps({"clip": {"visual": {"modelName": "ViT-B-32__openai"}}})},
+            files={"image": b"fake image bytes"},
+        )
+
+    assert response.status_code == 400
+    assert "zero" in response.json()["detail"].lower()
+
+
 def test_root_endpoint(deployed_app: TestClient) -> None:
    response = deployed_app.get("http://localhost:3003")