chore(server): don't insert embeddings if the model has changed (#17885)

* chore(server): don't insert embeddings if the model has changed

We're moving away from the heuristic of waiting for queues to complete. The job
which inserts embeddings can simply check if the model has changed before
inserting, rather than attempting to lock the queue.

* more robust dim size update

* use check constraint

* index command cleanup

* add create statement

* update medium test, create appropriate extension

* new line

* set dimension size when running on all assets

* why does it want braces smh

* take 2

---------

Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>
This commit is contained in:
Thomas 2025-04-29 19:23:01 +01:00 committed by GitHub
parent 0e4cf9ac57
commit 3ce353393a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 82 additions and 136 deletions

View File

@ -1,5 +1,5 @@
import { DatabaseExtension } from 'src/enum';
import { ConfigRepository } from 'src/repositories/config.repository';
import { vectorIndexQuery } from 'src/utils/database';
import { MigrationInterface, QueryRunner } from 'typeorm';
const vectorExtension = new ConfigRepository().getEnv().database.vectorExtension;
@ -8,15 +8,9 @@ export class AddCLIPEmbeddingIndex1700713994428 implements MigrationInterface {
name = 'AddCLIPEmbeddingIndex1700713994428';
public async up(queryRunner: QueryRunner): Promise<void> {
if (vectorExtension === DatabaseExtension.VECTORS) {
await queryRunner.query(`SET vectors.pgvector_compatibility=on`);
}
await queryRunner.query(`SET search_path TO "$user", public, vectors`);
await queryRunner.query(`
CREATE INDEX IF NOT EXISTS clip_index ON smart_search
USING hnsw (embedding vector_cosine_ops)
WITH (ef_construction = 300, m = 16)`);
await queryRunner.query(vectorIndexQuery({ vectorExtension, table: 'smart_search', indexName: 'clip_index' }));
}
public async down(queryRunner: QueryRunner): Promise<void> {

View File

@ -1,5 +1,5 @@
import { DatabaseExtension } from 'src/enum';
import { ConfigRepository } from 'src/repositories/config.repository';
import { vectorIndexQuery } from 'src/utils/database';
import { MigrationInterface, QueryRunner } from 'typeorm';
const vectorExtension = new ConfigRepository().getEnv().database.vectorExtension;
@ -8,15 +8,9 @@ export class AddFaceEmbeddingIndex1700714033632 implements MigrationInterface {
name = 'AddFaceEmbeddingIndex1700714033632';
public async up(queryRunner: QueryRunner): Promise<void> {
if (vectorExtension === DatabaseExtension.VECTORS) {
await queryRunner.query(`SET vectors.pgvector_compatibility=on`);
}
await queryRunner.query(`SET search_path TO "$user", public, vectors`);
await queryRunner.query(`
CREATE INDEX IF NOT EXISTS face_index ON asset_faces
USING hnsw (embedding vector_cosine_ops)
WITH (ef_construction = 300, m = 16)`);
await queryRunner.query(vectorIndexQuery({ vectorExtension, table: 'asset_faces', indexName: 'face_index' }));
}
public async down(queryRunner: QueryRunner): Promise<void> {

View File

@ -1,5 +1,6 @@
import { DatabaseExtension } from 'src/enum';
import { ConfigRepository } from 'src/repositories/config.repository';
import { vectorIndexQuery } from 'src/utils/database';
import { MigrationInterface, QueryRunner } from 'typeorm';
const vectorExtension = new ConfigRepository().getEnv().database.vectorExtension;
@ -8,7 +9,6 @@ export class AddFaceSearchRelation1718486162779 implements MigrationInterface {
public async up(queryRunner: QueryRunner): Promise<void> {
if (vectorExtension === DatabaseExtension.VECTORS) {
await queryRunner.query(`SET search_path TO "$user", public, vectors`);
await queryRunner.query(`SET vectors.pgvector_compatibility=on`);
}
const hasEmbeddings = async (tableName: string): Promise<boolean> => {
@ -47,21 +47,14 @@ export class AddFaceSearchRelation1718486162779 implements MigrationInterface {
await queryRunner.query(`ALTER TABLE face_search ALTER COLUMN embedding SET DATA TYPE real[]`);
await queryRunner.query(`ALTER TABLE face_search ALTER COLUMN embedding SET DATA TYPE vector(512)`);
await queryRunner.query(`
CREATE INDEX IF NOT EXISTS clip_index ON smart_search
USING hnsw (embedding vector_cosine_ops)
WITH (ef_construction = 300, m = 16)`);
await queryRunner.query(vectorIndexQuery({ vectorExtension, table: 'smart_search', indexName: 'clip_index' }));
await queryRunner.query(`
CREATE INDEX face_index ON face_search
USING hnsw (embedding vector_cosine_ops)
WITH (ef_construction = 300, m = 16)`);
await queryRunner.query(vectorIndexQuery({ vectorExtension, table: 'face_search', indexName: 'face_index' }));
}
public async down(queryRunner: QueryRunner): Promise<void> {
if (vectorExtension === DatabaseExtension.VECTORS) {
await queryRunner.query(`SET search_path TO "$user", public, vectors`);
await queryRunner.query(`SET vectors.pgvector_compatibility=on`);
}
await queryRunner.query(`ALTER TABLE asset_faces ADD COLUMN "embedding" vector(512)`);
@ -74,9 +67,6 @@ export class AddFaceSearchRelation1718486162779 implements MigrationInterface {
WHERE id = fs."faceId"`);
await queryRunner.query(`DROP TABLE face_search`);
await queryRunner.query(`
CREATE INDEX face_index ON asset_faces
USING hnsw (embedding vector_cosine_ops)
WITH (ef_construction = 300, m = 16)`);
await queryRunner.query(vectorIndexQuery({ vectorExtension, table: 'asset_faces', indexName: 'face_index' }));
}
}

View File

@ -12,6 +12,7 @@ import { DatabaseExtension, DatabaseLock, VectorIndex } from 'src/enum';
import { ConfigRepository } from 'src/repositories/config.repository';
import { LoggingRepository } from 'src/repositories/logging.repository';
import { ExtensionVersion, VectorExtension, VectorUpdateResult } from 'src/types';
import { vectorIndexQuery } from 'src/utils/database';
import { isValidInteger } from 'src/validation';
import { DataSource } from 'typeorm';
@ -119,12 +120,7 @@ export class DatabaseRepository {
await sql`ALTER TABLE ${sql.raw(table)} ALTER COLUMN embedding SET DATA TYPE vector(${sql.raw(String(dimSize))})`.execute(
tx,
);
await sql`SET vectors.pgvector_compatibility=on`.execute(tx);
await sql`
CREATE INDEX IF NOT EXISTS ${sql.raw(index)} ON ${sql.raw(table)}
USING hnsw (embedding vector_cosine_ops)
WITH (ef_construction = 300, m = 16)
`.execute(tx);
await sql.raw(vectorIndexQuery({ vectorExtension: this.vectorExtension, table, indexName: index })).execute(tx);
});
}
}

View File

@ -6,7 +6,8 @@ import { DB, Exif } from 'src/db';
import { DummyValue, GenerateSql } from 'src/decorators';
import { MapAsset } from 'src/dtos/asset-response.dto';
import { AssetStatus, AssetType } from 'src/enum';
import { anyUuid, asUuid, searchAssetBuilder } from 'src/utils/database';
import { ConfigRepository } from 'src/repositories/config.repository';
import { anyUuid, asUuid, searchAssetBuilder, vectorIndexQuery } from 'src/utils/database';
import { isValidInteger } from 'src/validation';
export interface SearchResult<T> {
@ -201,7 +202,10 @@ export interface GetCameraMakesOptions {
@Injectable()
export class SearchRepository {
constructor(@InjectKysely() private db: Kysely<DB>) {}
constructor(
@InjectKysely() private db: Kysely<DB>,
private configRepository: ConfigRepository,
) {}
@GenerateSql({
params: [
@ -446,8 +450,8 @@ export class SearchRepository {
async upsert(assetId: string, embedding: string): Promise<void> {
await this.db
.insertInto('smart_search')
.values({ assetId: asUuid(assetId), embedding } as any)
.onConflict((oc) => oc.column('assetId').doUpdateSet({ embedding } as any))
.values({ assetId, embedding })
.onConflict((oc) => oc.column('assetId').doUpdateSet((eb) => ({ embedding: eb.ref('excluded.embedding') })))
.execute();
}
@ -469,19 +473,32 @@ export class SearchRepository {
return dimSize;
}
setDimensionSize(dimSize: number): Promise<void> {
async setDimensionSize(dimSize: number): Promise<void> {
if (!isValidInteger(dimSize, { min: 1, max: 2 ** 16 })) {
throw new Error(`Invalid CLIP dimension size: ${dimSize}`);
}
return this.db.transaction().execute(async (trx) => {
await sql`truncate ${sql.table('smart_search')}`.execute(trx);
// this is done in two transactions to handle concurrent writes
await this.db.transaction().execute(async (trx) => {
await sql`delete from ${sql.table('smart_search')}`.execute(trx);
await trx.schema.alterTable('smart_search').dropConstraint('dim_size_constraint').ifExists().execute();
await sql`alter table ${sql.table('smart_search')} add constraint dim_size_constraint check (array_length(embedding::real[], 1) = ${sql.lit(dimSize)})`.execute(
trx,
);
});
const vectorExtension = this.configRepository.getEnv().database.vectorExtension;
await this.db.transaction().execute(async (trx) => {
await sql`drop index if exists clip_index`.execute(trx);
await trx.schema
.alterTable('smart_search')
.alterColumn('embedding', (col) => col.setDataType(sql.raw(`vector(${dimSize})`)))
.execute();
await sql`reindex index clip_index`.execute(trx);
await sql.raw(vectorIndexQuery({ vectorExtension, table: 'smart_search', indexName: 'clip_index' })).execute(trx);
await trx.schema.alterTable('smart_search').dropConstraint('dim_size_constraint').ifExists().execute();
});
await sql`vacuum analyze ${sql.table('smart_search')}`.execute(this.db);
}
async deleteAllSearchEmbeddings(): Promise<void> {

View File

@ -47,14 +47,8 @@ import { UserTable } from 'src/schema/tables/user.table';
import { VersionHistoryTable } from 'src/schema/tables/version-history.table';
import { ConfigurationParameter, Database, Extensions } from 'src/sql-tools';
@Extensions(['uuid-ossp', 'unaccent', 'cube', 'earthdistance', 'pg_trgm', 'vectors', 'plpgsql'])
@Extensions(['uuid-ossp', 'unaccent', 'cube', 'earthdistance', 'pg_trgm', 'plpgsql'])
@ConfigurationParameter({ name: 'search_path', value: () => '"$user", public, vectors', scope: 'database' })
@ConfigurationParameter({
name: 'vectors.pgvector_compatibility',
value: () => 'on',
scope: 'user',
synchronize: false,
})
@Database({ name: 'immich' })
export class ImmichDatabase {
tables = [

View File

@ -2,6 +2,7 @@ import { Kysely, sql } from 'kysely';
import { DatabaseExtension } from 'src/enum';
import { ConfigRepository } from 'src/repositories/config.repository';
import { LoggingRepository } from 'src/repositories/logging.repository';
import { vectorIndexQuery } from 'src/utils/database';
const vectorExtension = new ConfigRepository().getEnv().database.vectorExtension;
const lastMigrationSql = sql<{ name: string }>`SELECT "name" FROM "migrations" ORDER BY "timestamp" DESC LIMIT 1;`;
@ -29,7 +30,7 @@ export async function up(db: Kysely<any>): Promise<void> {
await sql`CREATE EXTENSION IF NOT EXISTS "cube";`.execute(db);
await sql`CREATE EXTENSION IF NOT EXISTS "earthdistance";`.execute(db);
await sql`CREATE EXTENSION IF NOT EXISTS "pg_trgm";`.execute(db);
await sql`CREATE EXTENSION IF NOT EXISTS "vectors";`.execute(db);
await sql`CREATE EXTENSION IF NOT EXISTS ${sql.raw(vectorExtension)}`.execute(db);
await sql`CREATE OR REPLACE FUNCTION immich_uuid_v7(p_timestamp timestamp with time zone default clock_timestamp())
RETURNS uuid
VOLATILE LANGUAGE SQL
@ -108,7 +109,6 @@ export async function up(db: Kysely<any>): Promise<void> {
$$;`.execute(db);
if (vectorExtension === DatabaseExtension.VECTORS) {
await sql`SET search_path TO "$user", public, vectors`.execute(db);
await sql`SET vectors.pgvector_compatibility=on`.execute(db);
}
await sql`CREATE TYPE "assets_status_enum" AS ENUM ('active','trashed','deleted');`.execute(db);
await sql`CREATE TYPE "sourcetype" AS ENUM ('machine-learning','exif','manual');`.execute(db);
@ -293,7 +293,7 @@ export async function up(db: Kysely<any>): Promise<void> {
await sql`CREATE INDEX "IDX_live_photo_cid" ON "exif" ("livePhotoCID")`.execute(db);
await sql`CREATE INDEX "IDX_auto_stack_id" ON "exif" ("autoStackId")`.execute(db);
await sql`CREATE INDEX "IDX_asset_exif_update_id" ON "exif" ("updateId")`.execute(db);
await sql`CREATE INDEX "face_index" ON "face_search" USING hnsw (embedding vector_cosine_ops) WITH (ef_construction = 300, m = 16)`.execute(db);
await sql.raw(vectorIndexQuery({ vectorExtension, table: 'face_search', indexName: 'face_index' })).execute(db);
await sql`CREATE INDEX "IDX_geodata_gist_earthcoord" ON "geodata_places" (ll_to_earth_public(latitude, longitude))`.execute(db);
await sql`CREATE INDEX "idx_geodata_places_name" ON "geodata_places" USING gin (f_unaccent("name") gin_trgm_ops)`.execute(db);
await sql`CREATE INDEX "idx_geodata_places_admin2_name" ON "geodata_places" USING gin (f_unaccent("admin2Name") gin_trgm_ops)`.execute(db);
@ -316,7 +316,7 @@ export async function up(db: Kysely<any>): Promise<void> {
await sql`CREATE INDEX "IDX_sharedlink_albumId" ON "shared_links" ("albumId")`.execute(db);
await sql`CREATE INDEX "IDX_5b7decce6c8d3db9593d6111a6" ON "shared_link__asset" ("assetsId")`.execute(db);
await sql`CREATE INDEX "IDX_c9fab4aa97ffd1b034f3d6581a" ON "shared_link__asset" ("sharedLinksId")`.execute(db);
await sql`CREATE INDEX "clip_index" ON "smart_search" USING hnsw (embedding vector_cosine_ops) WITH (ef_construction = 300, m = 16)`.execute(db);
await sql.raw(vectorIndexQuery({ vectorExtension, table: 'smart_search', indexName: 'clip_index' })).execute(db);
await sql`CREATE INDEX "IDX_d8ddd9d687816cc490432b3d4b" ON "session_sync_checkpoints" ("sessionId")`.execute(db);
await sql`CREATE INDEX "IDX_session_sync_checkpoints_update_id" ON "session_sync_checkpoints" ("updateId")`.execute(db);
await sql`CREATE INDEX "IDX_92e67dc508c705dd66c9461557" ON "tags" ("userId")`.execute(db);

View File

@ -58,10 +58,6 @@ describe(SmartInfoService.name, () => {
expect(mocks.search.getDimensionSize).not.toHaveBeenCalled();
expect(mocks.search.setDimensionSize).not.toHaveBeenCalled();
expect(mocks.search.deleteAllSearchEmbeddings).not.toHaveBeenCalled();
expect(mocks.job.getQueueStatus).not.toHaveBeenCalled();
expect(mocks.job.pause).not.toHaveBeenCalled();
expect(mocks.job.waitForQueueCompletion).not.toHaveBeenCalled();
expect(mocks.job.resume).not.toHaveBeenCalled();
});
it('should return if model and DB dimension size are equal', async () => {
@ -72,38 +68,15 @@ describe(SmartInfoService.name, () => {
expect(mocks.search.getDimensionSize).toHaveBeenCalledTimes(1);
expect(mocks.search.setDimensionSize).not.toHaveBeenCalled();
expect(mocks.search.deleteAllSearchEmbeddings).not.toHaveBeenCalled();
expect(mocks.job.getQueueStatus).not.toHaveBeenCalled();
expect(mocks.job.pause).not.toHaveBeenCalled();
expect(mocks.job.waitForQueueCompletion).not.toHaveBeenCalled();
expect(mocks.job.resume).not.toHaveBeenCalled();
});
it('should update DB dimension size if model and DB have different values', async () => {
mocks.search.getDimensionSize.mockResolvedValue(768);
mocks.job.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: false });
await sut.onConfigInit({ newConfig: systemConfigStub.machineLearningEnabled as SystemConfig });
expect(mocks.search.getDimensionSize).toHaveBeenCalledTimes(1);
expect(mocks.search.setDimensionSize).toHaveBeenCalledWith(512);
expect(mocks.job.getQueueStatus).toHaveBeenCalledTimes(1);
expect(mocks.job.pause).toHaveBeenCalledTimes(1);
expect(mocks.job.waitForQueueCompletion).toHaveBeenCalledTimes(1);
expect(mocks.job.resume).toHaveBeenCalledTimes(1);
});
it('should skip pausing and resuming queue if already paused', async () => {
mocks.search.getDimensionSize.mockResolvedValue(768);
mocks.job.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: true });
await sut.onConfigInit({ newConfig: systemConfigStub.machineLearningEnabled as SystemConfig });
expect(mocks.search.getDimensionSize).toHaveBeenCalledTimes(1);
expect(mocks.search.setDimensionSize).toHaveBeenCalledWith(512);
expect(mocks.job.getQueueStatus).toHaveBeenCalledTimes(1);
expect(mocks.job.pause).not.toHaveBeenCalled();
expect(mocks.job.waitForQueueCompletion).toHaveBeenCalledTimes(1);
expect(mocks.job.resume).not.toHaveBeenCalled();
});
});
@ -120,10 +93,6 @@ describe(SmartInfoService.name, () => {
expect(mocks.search.getDimensionSize).not.toHaveBeenCalled();
expect(mocks.search.setDimensionSize).not.toHaveBeenCalled();
expect(mocks.search.deleteAllSearchEmbeddings).not.toHaveBeenCalled();
expect(mocks.job.getQueueStatus).not.toHaveBeenCalled();
expect(mocks.job.pause).not.toHaveBeenCalled();
expect(mocks.job.waitForQueueCompletion).not.toHaveBeenCalled();
expect(mocks.job.resume).not.toHaveBeenCalled();
});
it('should return if model and DB dimension size are equal', async () => {
@ -141,15 +110,10 @@ describe(SmartInfoService.name, () => {
expect(mocks.search.getDimensionSize).toHaveBeenCalledTimes(1);
expect(mocks.search.setDimensionSize).not.toHaveBeenCalled();
expect(mocks.search.deleteAllSearchEmbeddings).not.toHaveBeenCalled();
expect(mocks.job.getQueueStatus).not.toHaveBeenCalled();
expect(mocks.job.pause).not.toHaveBeenCalled();
expect(mocks.job.waitForQueueCompletion).not.toHaveBeenCalled();
expect(mocks.job.resume).not.toHaveBeenCalled();
});
it('should update DB dimension size if model and DB have different values', async () => {
mocks.search.getDimensionSize.mockResolvedValue(512);
mocks.job.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: false });
await sut.onConfigUpdate({
newConfig: {
@ -162,15 +126,10 @@ describe(SmartInfoService.name, () => {
expect(mocks.search.getDimensionSize).toHaveBeenCalledTimes(1);
expect(mocks.search.setDimensionSize).toHaveBeenCalledWith(768);
expect(mocks.job.getQueueStatus).toHaveBeenCalledTimes(1);
expect(mocks.job.pause).toHaveBeenCalledTimes(1);
expect(mocks.job.waitForQueueCompletion).toHaveBeenCalledTimes(1);
expect(mocks.job.resume).toHaveBeenCalledTimes(1);
});
it('should clear embeddings if old and new models are different', async () => {
mocks.search.getDimensionSize.mockResolvedValue(512);
mocks.job.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: false });
await sut.onConfigUpdate({
newConfig: {
@ -184,31 +143,6 @@ describe(SmartInfoService.name, () => {
expect(mocks.search.deleteAllSearchEmbeddings).toHaveBeenCalled();
expect(mocks.search.getDimensionSize).toHaveBeenCalledTimes(1);
expect(mocks.search.setDimensionSize).not.toHaveBeenCalled();
expect(mocks.job.getQueueStatus).toHaveBeenCalledTimes(1);
expect(mocks.job.pause).toHaveBeenCalledTimes(1);
expect(mocks.job.waitForQueueCompletion).toHaveBeenCalledTimes(1);
expect(mocks.job.resume).toHaveBeenCalledTimes(1);
});
it('should skip pausing and resuming queue if already paused', async () => {
mocks.search.getDimensionSize.mockResolvedValue(512);
mocks.job.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: true });
await sut.onConfigUpdate({
newConfig: {
machineLearning: { clip: { modelName: 'ViT-B-32__openai', enabled: true }, enabled: true },
} as SystemConfig,
oldConfig: {
machineLearning: { clip: { modelName: 'ViT-B-16__openai', enabled: true }, enabled: true },
} as SystemConfig,
});
expect(mocks.search.getDimensionSize).toHaveBeenCalledTimes(1);
expect(mocks.search.setDimensionSize).not.toHaveBeenCalled();
expect(mocks.job.getQueueStatus).toHaveBeenCalledTimes(1);
expect(mocks.job.pause).not.toHaveBeenCalled();
expect(mocks.job.waitForQueueCompletion).toHaveBeenCalledTimes(1);
expect(mocks.job.resume).not.toHaveBeenCalled();
});
});
@ -220,6 +154,7 @@ describe(SmartInfoService.name, () => {
expect(mocks.asset.getAll).not.toHaveBeenCalled();
expect(mocks.asset.getWithout).not.toHaveBeenCalled();
expect(mocks.search.setDimensionSize).not.toHaveBeenCalled();
});
it('should queue the assets without clip embeddings', async () => {
@ -234,7 +169,7 @@ describe(SmartInfoService.name, () => {
{ name: JobName.SMART_SEARCH, data: { id: assetStub.image.id } },
]);
expect(mocks.asset.getWithout).toHaveBeenCalledWith({ skip: 0, take: 1000 }, WithoutProperty.SMART_SEARCH);
expect(mocks.search.deleteAllSearchEmbeddings).not.toHaveBeenCalled();
expect(mocks.search.setDimensionSize).not.toHaveBeenCalled();
});
it('should queue all the assets', async () => {
@ -249,7 +184,7 @@ describe(SmartInfoService.name, () => {
{ name: JobName.SMART_SEARCH, data: { id: assetStub.image.id } },
]);
expect(mocks.asset.getAll).toHaveBeenCalled();
expect(mocks.search.deleteAllSearchEmbeddings).toHaveBeenCalled();
expect(mocks.search.setDimensionSize).toHaveBeenCalledExactlyOnceWith(512);
});
});

View File

@ -50,12 +50,6 @@ export class SmartInfoService extends BaseService {
return;
}
const { isPaused } = await this.jobRepository.getQueueStatus(QueueName.SMART_SEARCH);
if (!isPaused) {
await this.jobRepository.pause(QueueName.SMART_SEARCH);
}
await this.jobRepository.waitForQueueCompletion(QueueName.SMART_SEARCH);
if (dimSizeChange) {
this.logger.log(
`Dimension size of model ${newConfig.machineLearning.clip.modelName} is ${dimSize}, but database expects ${dbDimSize}.`,
@ -67,9 +61,8 @@ export class SmartInfoService extends BaseService {
await this.searchRepository.deleteAllSearchEmbeddings();
}
if (!isPaused) {
await this.jobRepository.resume(QueueName.SMART_SEARCH);
}
// TODO: A job to reindex all assets should be scheduled, though user
// confirmation should probably be requested before doing that.
});
}
@ -81,7 +74,9 @@ export class SmartInfoService extends BaseService {
}
if (force) {
await this.searchRepository.deleteAllSearchEmbeddings();
const { dimSize } = getCLIPModelInfo(machineLearning.clip.modelName);
// in addition to deleting embeddings, update the dimension size in case it failed earlier
await this.searchRepository.setDimensionSize(dimSize);
}
const assetPagination = usePagination(JOBS_ASSET_PAGINATION_SIZE, (pagination) => {
@ -126,6 +121,12 @@ export class SmartInfoService extends BaseService {
await this.databaseRepository.wait(DatabaseLock.CLIPDimSize);
}
const newConfig = await this.getConfig({ withCache: true });
if (machineLearning.clip.modelName !== newConfig.machineLearning.clip.modelName) {
// Skip the job if the the model has changed since the embedding was generated.
return JobStatus.SKIPPED;
}
await this.searchRepository.upsert(asset.id, embedding);
return JobStatus.SUCCESS;

View File

@ -17,10 +17,10 @@ import { parse } from 'pg-connection-string';
import postgres, { Notice } from 'postgres';
import { columns, Exif, Person } from 'src/database';
import { DB } from 'src/db';
import { AssetFileType } from 'src/enum';
import { AssetFileType, DatabaseExtension } from 'src/enum';
import { TimeBucketSize } from 'src/repositories/asset.repository';
import { AssetSearchBuilderOptions } from 'src/repositories/search.repository';
import { DatabaseConnectionParams } from 'src/types';
import { DatabaseConnectionParams, VectorExtension } from 'src/types';
type Ssl = 'require' | 'allow' | 'prefer' | 'verify-full' | boolean | object;
@ -373,3 +373,28 @@ export function searchAssetBuilder(kysely: Kysely<DB>, options: AssetSearchBuild
.$if(!!(options.withFaces || options.withPeople || options.personIds), (qb) => qb.select(withFacesAndPeople))
.$if(!options.withDeleted, (qb) => qb.where('assets.deletedAt', 'is', null));
}
type VectorIndexOptions = { vectorExtension: VectorExtension; table: string; indexName: string };
export function vectorIndexQuery({ vectorExtension, table, indexName }: VectorIndexOptions): string {
switch (vectorExtension) {
case DatabaseExtension.VECTORS: {
return `
CREATE INDEX IF NOT EXISTS ${indexName} ON ${table}
USING vectors (embedding vector_cos_ops) WITH (options = $$
[indexing.hnsw]
m = 16
ef_construction = 300
$$)`;
}
case DatabaseExtension.VECTOR: {
return `
CREATE INDEX IF NOT EXISTS ${indexName} ON ${table}
USING hnsw (embedding vector_cosine_ops)
WITH (ef_construction = 300, m = 16)`;
}
default: {
throw new Error(`Unsupported vector extension: '${vectorExtension}'`);
}
}
}

View File

@ -173,7 +173,7 @@ export const getRepository = <K extends keyof RepositoriesTypes>(key: K, db: Kys
}
case 'search': {
return new SearchRepository(db);
return new SearchRepository(db, new ConfigRepository());
}
case 'session': {