From a808b8610e762eab17038a0b25c2ca84d7e1e691 Mon Sep 17 00:00:00 2001
From: Tom Graham <tom@sirwhite.com>
Date: Fri, 28 Feb 2025 03:14:09 +1100
Subject: [PATCH] fix(server): Fix delay with multiple ml servers (#16284)

* Prospective fix for ensuring that known active ML servers are used to reduce search delay.

* Added some logging and renamed backoff const.

* Fix lint issues.

* Update to use env vars for timeouts and updated documentation and strings.

* Fix docs.

* Make counter logic clearer.

* Minor readability improvements.

* Extract  skipUrl logic per feedback, and change log to verbose.

* Make code harder to read.
---
 docs/docs/administration/system-settings.md   |  8 +++
 docs/docs/install/environment-variables.md    |  2 +
 i18n/en.json                                  |  2 +-
 server/src/constants.ts                       |  5 ++
 .../machine-learning.repository.ts            | 66 +++++++++++++++++++
 5 files changed, 82 insertions(+), 1 deletion(-)
diff --git a/docs/docs/administration/system-settings.md b/docs/docs/administration/system-settings.md
index 92b910a01b..f241050136 100644
--- a/docs/docs/administration/system-settings.md
+++ b/docs/docs/administration/system-settings.md
@@ -98,6 +98,14 @@ The default Immich log level is `Log` (commonly known as `Info`). The Immich adm
 Through this setting, you can manage all the settings related to machine learning in Immich, from the setting of remote machine learning to the model and its parameters
 You can choose to disable a certain type of machine learning, for example smart search or facial recognition.
 
+### URL
+
+The built in (`http://immich-machine-learning:3003`) machine learning server will be configured by default, but you can change this or add additional servers.
+
+Hosting the `immich-machine-learning` container on a machine with a more powerful GPU can be helpful to for processing a large number of photos (such as during batch import) or for faster search.
+
+If more than one URL is provided, each server will be attempted one-at-a-time until one responds successfully, in order from first to last. Servers that don't respond will be temporarily ignored until they come back online.
+
 ### Smart Search
 
 The [smart search](/docs/features/searching) settings allow you to change the [CLIP model](https://openai.com/research/clip). Larger models will typically provide [more accurate search results](https://github.com/immich-app/immich/discussions/11862) but consume more processing power and RAM. When [changing the CLIP model](/docs/FAQ#can-i-use-a-custom-clip-model) it is mandatory to re-run the Smart Search job on all images to fully apply the change.
diff --git a/docs/docs/install/environment-variables.md b/docs/docs/install/environment-variables.md
index a57eef540d..16f05b6338 100644
--- a/docs/docs/install/environment-variables.md
+++ b/docs/docs/install/environment-variables.md
@@ -168,6 +168,8 @@ Redis (Sentinel) URL example JSON before encoding:
 | `MACHINE_LEARNING_ANN_TUNING_LEVEL`                         | ARM-NN GPU tuning level (1: rapid, 2: normal, 3: exhaustive)                                        |               `2`               | machine learning |
 | `MACHINE_LEARNING_DEVICE_IDS`<sup>\*4</sup>                 | Device IDs to use in multi-GPU environments                                                         |               `0`               | machine learning |
 | `MACHINE_LEARNING_MAX_BATCH_SIZE__FACIAL_RECOGNITION`       | Set the maximum number of faces that will be processed at once by the facial recognition model      |  None (`1` if using OpenVINO)   | machine learning |
+| `MACHINE_LEARNING_PING_TIMEOUT`                             | How long (ms) to wait for a PING response when checking if an ML server is available                |             `2000`              | server           |
+| `MACHINE_LEARNING_AVAILABILITY_BACKOFF_TIME`                | How long to ignore ML servers that are offline before trying again                                  |             `30000`             | server           |
 
 \*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.
 
diff --git a/i18n/en.json b/i18n/en.json
index 1bf118976e..e35f1906c4 100644
--- a/i18n/en.json
+++ b/i18n/en.json
@@ -131,7 +131,7 @@
     "machine_learning_smart_search_description": "Search for images semantically using CLIP embeddings",
     "machine_learning_smart_search_enabled": "Enable smart search",
     "machine_learning_smart_search_enabled_description": "If disabled, images will not be encoded for smart search.",
-    "machine_learning_url_description": "The URL of the machine learning server. If more than one URL is provided, each server will be attempted one-at-a-time until one responds successfully, in order from first to last.",
+    "machine_learning_url_description": "The URL of the machine learning server. If more than one URL is provided, each server will be attempted one-at-a-time until one responds successfully, in order from first to last. Servers that don't respond will be temporarily ignored until they come back online.",
     "manage_concurrency": "Manage Concurrency",
     "manage_log_settings": "Manage log settings",
     "map_dark_style": "Dark style",
diff --git a/server/src/constants.ts b/server/src/constants.ts
index 889ce81620..20ce7dd497 100644
--- a/server/src/constants.ts
+++ b/server/src/constants.ts
@@ -38,6 +38,11 @@ export const ONE_HOUR = Duration.fromObject({ hours: 1 });
 
 export const APP_MEDIA_LOCATION = process.env.IMMICH_MEDIA_LOCATION || './upload';
 
+export const MACHINE_LEARNING_PING_TIMEOUT = Number(process.env.MACHINE_LEARNING_PING_TIMEOUT || 2000);
+export const MACHINE_LEARNING_AVAILABILITY_BACKOFF_TIME = Number(
+  process.env.MACHINE_LEARNING_AVAILABILITY_BACKOFF_TIME || 30_000,
+);
+
 export const citiesFile = 'cities500.txt';
 
 export const MOBILE_REDIRECT = 'app.immich:///oauth-callback';
diff --git a/server/src/repositories/machine-learning.repository.ts b/server/src/repositories/machine-learning.repository.ts
index 8145bf3154..5e916c71f3 100644
--- a/server/src/repositories/machine-learning.repository.ts
+++ b/server/src/repositories/machine-learning.repository.ts
@@ -1,5 +1,6 @@
 import { Injectable } from '@nestjs/common';
 import { readFile } from 'node:fs/promises';
+import { MACHINE_LEARNING_AVAILABILITY_BACKOFF_TIME, MACHINE_LEARNING_PING_TIMEOUT } from 'src/constants';
 import { CLIPConfig } from 'src/dtos/model-config.dto';
 import { LoggingRepository } from 'src/repositories/logging.repository';
 
@@ -55,16 +56,80 @@ export type MachineLearningRequest = ClipVisualRequest | ClipTextualRequest | Fa
 
 @Injectable()
 export class MachineLearningRepository {
+  // Note that deleted URL's are not removed from this map (ie: they're leaked)
+  // Cleaning them up is low priority since there should be very few over a
+  // typical server uptime cycle
+  private urlAvailability: {
+    [url: string]:
+      | {
+          active: boolean;
+          lastChecked: number;
+        }
+      | undefined;
+  };
+
   constructor(private logger: LoggingRepository) {
     this.logger.setContext(MachineLearningRepository.name);
+    this.urlAvailability = {};
+  }
+
+  private setUrlAvailability(url: string, active: boolean) {
+    const current = this.urlAvailability[url];
+    if (current?.active !== active) {
+      this.logger.verbose(`Setting ${url} ML server to ${active ? 'active' : 'inactive'}.`);
+    }
+    this.urlAvailability[url] = {
+      active,
+      lastChecked: Date.now(),
+    };
+  }
+
+  private async checkAvailability(url: string) {
+    let active = false;
+    try {
+      const response = await fetch(new URL('/ping', url), {
+        signal: AbortSignal.timeout(MACHINE_LEARNING_PING_TIMEOUT),
+      });
+      active = response.ok;
+    } catch {}
+    this.setUrlAvailability(url, active);
+    return active;
+  }
+
+  private async shouldSkipUrl(url: string) {
+    const availability = this.urlAvailability[url];
+    if (availability === undefined) {
+      // If this is a new endpoint, then check inline and skip if it fails
+      if (!(await this.checkAvailability(url))) {
+        return true;
+      }
+      return false;
+    }
+    if (!availability.active && Date.now() - availability.lastChecked < MACHINE_LEARNING_AVAILABILITY_BACKOFF_TIME) {
+      // If this is an old inactive endpoint that hasn't been checked in a
+      // while then check but don't wait for the result, just skip it
+      // This avoids delays on every search whilst allowing higher priority
+      // ML servers to recover over time.
+      void this.checkAvailability(url);
+      return true;
+    }
+    return false;
   }
 
   private async predict<T>(urls: string[], payload: ModelPayload, config: MachineLearningRequest): Promise<T> {
     const formData = await this.getFormData(payload, config);
+    let urlCounter = 0;
     for (const url of urls) {
+      urlCounter++;
+      const isLast = urlCounter >= urls.length;
+      if (!isLast && (await this.shouldSkipUrl(url))) {
+        continue;
+      }
+
       try {
         const response = await fetch(new URL('/predict', url), { method: 'POST', body: formData });
         if (response.ok) {
+          this.setUrlAvailability(url, true);
           return response.json();
         }
 
@@ -76,6 +141,7 @@ export class MachineLearningRepository {
           `Machine learning request to "${url}" failed: ${error instanceof Error ? error.message : error}`,
         );
       }
+      this.setUrlAvailability(url, false);
     }
 
     throw new Error(`Machine learning request '${JSON.stringify(config)}' failed for all URLs`);