diff --git a/server/src/migrations/1700713994428-AddCLIPEmbeddingIndex.ts b/server/src/migrations/1700713994428-AddCLIPEmbeddingIndex.ts index 993e12f822..b5d47bb8cd 100644 --- a/server/src/migrations/1700713994428-AddCLIPEmbeddingIndex.ts +++ b/server/src/migrations/1700713994428-AddCLIPEmbeddingIndex.ts @@ -1,5 +1,5 @@ -import { DatabaseExtension } from 'src/enum'; import { ConfigRepository } from 'src/repositories/config.repository'; +import { vectorIndexQuery } from 'src/utils/database'; import { MigrationInterface, QueryRunner } from 'typeorm'; const vectorExtension = new ConfigRepository().getEnv().database.vectorExtension; @@ -8,15 +8,9 @@ export class AddCLIPEmbeddingIndex1700713994428 implements MigrationInterface { name = 'AddCLIPEmbeddingIndex1700713994428'; public async up(queryRunner: QueryRunner): Promise { - if (vectorExtension === DatabaseExtension.VECTORS) { - await queryRunner.query(`SET vectors.pgvector_compatibility=on`); - } await queryRunner.query(`SET search_path TO "$user", public, vectors`); - await queryRunner.query(` - CREATE INDEX IF NOT EXISTS clip_index ON smart_search - USING hnsw (embedding vector_cosine_ops) - WITH (ef_construction = 300, m = 16)`); + await queryRunner.query(vectorIndexQuery({ vectorExtension, table: 'smart_search', indexName: 'clip_index' })); } public async down(queryRunner: QueryRunner): Promise { diff --git a/server/src/migrations/1700714033632-AddFaceEmbeddingIndex.ts b/server/src/migrations/1700714033632-AddFaceEmbeddingIndex.ts index 182aae4e42..2b41788fe4 100644 --- a/server/src/migrations/1700714033632-AddFaceEmbeddingIndex.ts +++ b/server/src/migrations/1700714033632-AddFaceEmbeddingIndex.ts @@ -1,5 +1,5 @@ -import { DatabaseExtension } from 'src/enum'; import { ConfigRepository } from 'src/repositories/config.repository'; +import { vectorIndexQuery } from 'src/utils/database'; import { MigrationInterface, QueryRunner } from 'typeorm'; const vectorExtension = new ConfigRepository().getEnv().database.vectorExtension; @@ -8,15 +8,9 @@ export class AddFaceEmbeddingIndex1700714033632 implements MigrationInterface { name = 'AddFaceEmbeddingIndex1700714033632'; public async up(queryRunner: QueryRunner): Promise { - if (vectorExtension === DatabaseExtension.VECTORS) { - await queryRunner.query(`SET vectors.pgvector_compatibility=on`); - } await queryRunner.query(`SET search_path TO "$user", public, vectors`); - await queryRunner.query(` - CREATE INDEX IF NOT EXISTS face_index ON asset_faces - USING hnsw (embedding vector_cosine_ops) - WITH (ef_construction = 300, m = 16)`); + await queryRunner.query(vectorIndexQuery({ vectorExtension, table: 'asset_faces', indexName: 'face_index' })); } public async down(queryRunner: QueryRunner): Promise { diff --git a/server/src/migrations/1718486162779-AddFaceSearchRelation.ts b/server/src/migrations/1718486162779-AddFaceSearchRelation.ts index e08bcb8e25..64849708d2 100644 --- a/server/src/migrations/1718486162779-AddFaceSearchRelation.ts +++ b/server/src/migrations/1718486162779-AddFaceSearchRelation.ts @@ -1,5 +1,6 @@ import { DatabaseExtension } from 'src/enum'; import { ConfigRepository } from 'src/repositories/config.repository'; +import { vectorIndexQuery } from 'src/utils/database'; import { MigrationInterface, QueryRunner } from 'typeorm'; const vectorExtension = new ConfigRepository().getEnv().database.vectorExtension; @@ -8,7 +9,6 @@ export class AddFaceSearchRelation1718486162779 implements MigrationInterface { public async up(queryRunner: QueryRunner): Promise { if (vectorExtension === DatabaseExtension.VECTORS) { await queryRunner.query(`SET search_path TO "$user", public, vectors`); - await queryRunner.query(`SET vectors.pgvector_compatibility=on`); } const hasEmbeddings = async (tableName: string): Promise => { @@ -47,21 +47,14 @@ export class AddFaceSearchRelation1718486162779 implements MigrationInterface { await queryRunner.query(`ALTER TABLE face_search ALTER COLUMN embedding SET DATA TYPE real[]`); await queryRunner.query(`ALTER TABLE face_search ALTER COLUMN embedding SET DATA TYPE vector(512)`); - await queryRunner.query(` - CREATE INDEX IF NOT EXISTS clip_index ON smart_search - USING hnsw (embedding vector_cosine_ops) - WITH (ef_construction = 300, m = 16)`); + await queryRunner.query(vectorIndexQuery({ vectorExtension, table: 'smart_search', indexName: 'clip_index' })); - await queryRunner.query(` - CREATE INDEX face_index ON face_search - USING hnsw (embedding vector_cosine_ops) - WITH (ef_construction = 300, m = 16)`); + await queryRunner.query(vectorIndexQuery({ vectorExtension, table: 'face_search', indexName: 'face_index' })); } public async down(queryRunner: QueryRunner): Promise { if (vectorExtension === DatabaseExtension.VECTORS) { await queryRunner.query(`SET search_path TO "$user", public, vectors`); - await queryRunner.query(`SET vectors.pgvector_compatibility=on`); } await queryRunner.query(`ALTER TABLE asset_faces ADD COLUMN "embedding" vector(512)`); @@ -74,9 +67,6 @@ export class AddFaceSearchRelation1718486162779 implements MigrationInterface { WHERE id = fs."faceId"`); await queryRunner.query(`DROP TABLE face_search`); - await queryRunner.query(` - CREATE INDEX face_index ON asset_faces - USING hnsw (embedding vector_cosine_ops) - WITH (ef_construction = 300, m = 16)`); + await queryRunner.query(vectorIndexQuery({ vectorExtension, table: 'asset_faces', indexName: 'face_index' })); } } diff --git a/server/src/repositories/database.repository.ts b/server/src/repositories/database.repository.ts index a402c9d28d..addf6bcff0 100644 --- a/server/src/repositories/database.repository.ts +++ b/server/src/repositories/database.repository.ts @@ -12,6 +12,7 @@ import { DatabaseExtension, DatabaseLock, VectorIndex } from 'src/enum'; import { ConfigRepository } from 'src/repositories/config.repository'; import { LoggingRepository } from 'src/repositories/logging.repository'; import { ExtensionVersion, VectorExtension, VectorUpdateResult } from 'src/types'; +import { vectorIndexQuery } from 'src/utils/database'; import { isValidInteger } from 'src/validation'; import { DataSource } from 'typeorm'; @@ -119,12 +120,7 @@ export class DatabaseRepository { await sql`ALTER TABLE ${sql.raw(table)} ALTER COLUMN embedding SET DATA TYPE vector(${sql.raw(String(dimSize))})`.execute( tx, ); - await sql`SET vectors.pgvector_compatibility=on`.execute(tx); - await sql` - CREATE INDEX IF NOT EXISTS ${sql.raw(index)} ON ${sql.raw(table)} - USING hnsw (embedding vector_cosine_ops) - WITH (ef_construction = 300, m = 16) - `.execute(tx); + await sql.raw(vectorIndexQuery({ vectorExtension: this.vectorExtension, table, indexName: index })).execute(tx); }); } } diff --git a/server/src/repositories/search.repository.ts b/server/src/repositories/search.repository.ts index 5c1ebae69d..0c958fec02 100644 --- a/server/src/repositories/search.repository.ts +++ b/server/src/repositories/search.repository.ts @@ -6,7 +6,8 @@ import { DB, Exif } from 'src/db'; import { DummyValue, GenerateSql } from 'src/decorators'; import { MapAsset } from 'src/dtos/asset-response.dto'; import { AssetStatus, AssetType } from 'src/enum'; -import { anyUuid, asUuid, searchAssetBuilder } from 'src/utils/database'; +import { ConfigRepository } from 'src/repositories/config.repository'; +import { anyUuid, asUuid, searchAssetBuilder, vectorIndexQuery } from 'src/utils/database'; import { isValidInteger } from 'src/validation'; export interface SearchResult { @@ -201,7 +202,10 @@ export interface GetCameraMakesOptions { @Injectable() export class SearchRepository { - constructor(@InjectKysely() private db: Kysely) {} + constructor( + @InjectKysely() private db: Kysely, + private configRepository: ConfigRepository, + ) {} @GenerateSql({ params: [ @@ -446,8 +450,8 @@ export class SearchRepository { async upsert(assetId: string, embedding: string): Promise { await this.db .insertInto('smart_search') - .values({ assetId: asUuid(assetId), embedding } as any) - .onConflict((oc) => oc.column('assetId').doUpdateSet({ embedding } as any)) + .values({ assetId, embedding }) + .onConflict((oc) => oc.column('assetId').doUpdateSet((eb) => ({ embedding: eb.ref('excluded.embedding') }))) .execute(); } @@ -469,19 +473,32 @@ export class SearchRepository { return dimSize; } - setDimensionSize(dimSize: number): Promise { + async setDimensionSize(dimSize: number): Promise { if (!isValidInteger(dimSize, { min: 1, max: 2 ** 16 })) { throw new Error(`Invalid CLIP dimension size: ${dimSize}`); } - return this.db.transaction().execute(async (trx) => { - await sql`truncate ${sql.table('smart_search')}`.execute(trx); + // this is done in two transactions to handle concurrent writes + await this.db.transaction().execute(async (trx) => { + await sql`delete from ${sql.table('smart_search')}`.execute(trx); + await trx.schema.alterTable('smart_search').dropConstraint('dim_size_constraint').ifExists().execute(); + await sql`alter table ${sql.table('smart_search')} add constraint dim_size_constraint check (array_length(embedding::real[], 1) = ${sql.lit(dimSize)})`.execute( + trx, + ); + }); + + const vectorExtension = this.configRepository.getEnv().database.vectorExtension; + await this.db.transaction().execute(async (trx) => { + await sql`drop index if exists clip_index`.execute(trx); await trx.schema .alterTable('smart_search') .alterColumn('embedding', (col) => col.setDataType(sql.raw(`vector(${dimSize})`))) .execute(); - await sql`reindex index clip_index`.execute(trx); + await sql.raw(vectorIndexQuery({ vectorExtension, table: 'smart_search', indexName: 'clip_index' })).execute(trx); + await trx.schema.alterTable('smart_search').dropConstraint('dim_size_constraint').ifExists().execute(); }); + + await sql`vacuum analyze ${sql.table('smart_search')}`.execute(this.db); } async deleteAllSearchEmbeddings(): Promise { diff --git a/server/src/schema/index.ts b/server/src/schema/index.ts index d297b2217d..c62681d049 100644 --- a/server/src/schema/index.ts +++ b/server/src/schema/index.ts @@ -47,14 +47,8 @@ import { UserTable } from 'src/schema/tables/user.table'; import { VersionHistoryTable } from 'src/schema/tables/version-history.table'; import { ConfigurationParameter, Database, Extensions } from 'src/sql-tools'; -@Extensions(['uuid-ossp', 'unaccent', 'cube', 'earthdistance', 'pg_trgm', 'vectors', 'plpgsql']) +@Extensions(['uuid-ossp', 'unaccent', 'cube', 'earthdistance', 'pg_trgm', 'plpgsql']) @ConfigurationParameter({ name: 'search_path', value: () => '"$user", public, vectors', scope: 'database' }) -@ConfigurationParameter({ - name: 'vectors.pgvector_compatibility', - value: () => 'on', - scope: 'user', - synchronize: false, -}) @Database({ name: 'immich' }) export class ImmichDatabase { tables = [ diff --git a/server/src/schema/migrations/1744910873969-InitialMigration.ts b/server/src/schema/migrations/1744910873969-InitialMigration.ts index 459534a26a..ce4a37ae3b 100644 --- a/server/src/schema/migrations/1744910873969-InitialMigration.ts +++ b/server/src/schema/migrations/1744910873969-InitialMigration.ts @@ -2,6 +2,7 @@ import { Kysely, sql } from 'kysely'; import { DatabaseExtension } from 'src/enum'; import { ConfigRepository } from 'src/repositories/config.repository'; import { LoggingRepository } from 'src/repositories/logging.repository'; +import { vectorIndexQuery } from 'src/utils/database'; const vectorExtension = new ConfigRepository().getEnv().database.vectorExtension; const lastMigrationSql = sql<{ name: string }>`SELECT "name" FROM "migrations" ORDER BY "timestamp" DESC LIMIT 1;`; @@ -29,7 +30,7 @@ export async function up(db: Kysely): Promise { await sql`CREATE EXTENSION IF NOT EXISTS "cube";`.execute(db); await sql`CREATE EXTENSION IF NOT EXISTS "earthdistance";`.execute(db); await sql`CREATE EXTENSION IF NOT EXISTS "pg_trgm";`.execute(db); - await sql`CREATE EXTENSION IF NOT EXISTS "vectors";`.execute(db); + await sql`CREATE EXTENSION IF NOT EXISTS ${sql.raw(vectorExtension)}`.execute(db); await sql`CREATE OR REPLACE FUNCTION immich_uuid_v7(p_timestamp timestamp with time zone default clock_timestamp()) RETURNS uuid VOLATILE LANGUAGE SQL @@ -108,7 +109,6 @@ export async function up(db: Kysely): Promise { $$;`.execute(db); if (vectorExtension === DatabaseExtension.VECTORS) { await sql`SET search_path TO "$user", public, vectors`.execute(db); - await sql`SET vectors.pgvector_compatibility=on`.execute(db); } await sql`CREATE TYPE "assets_status_enum" AS ENUM ('active','trashed','deleted');`.execute(db); await sql`CREATE TYPE "sourcetype" AS ENUM ('machine-learning','exif','manual');`.execute(db); @@ -293,7 +293,7 @@ export async function up(db: Kysely): Promise { await sql`CREATE INDEX "IDX_live_photo_cid" ON "exif" ("livePhotoCID")`.execute(db); await sql`CREATE INDEX "IDX_auto_stack_id" ON "exif" ("autoStackId")`.execute(db); await sql`CREATE INDEX "IDX_asset_exif_update_id" ON "exif" ("updateId")`.execute(db); - await sql`CREATE INDEX "face_index" ON "face_search" USING hnsw (embedding vector_cosine_ops) WITH (ef_construction = 300, m = 16)`.execute(db); + await sql.raw(vectorIndexQuery({ vectorExtension, table: 'face_search', indexName: 'face_index' })).execute(db); await sql`CREATE INDEX "IDX_geodata_gist_earthcoord" ON "geodata_places" (ll_to_earth_public(latitude, longitude))`.execute(db); await sql`CREATE INDEX "idx_geodata_places_name" ON "geodata_places" USING gin (f_unaccent("name") gin_trgm_ops)`.execute(db); await sql`CREATE INDEX "idx_geodata_places_admin2_name" ON "geodata_places" USING gin (f_unaccent("admin2Name") gin_trgm_ops)`.execute(db); @@ -316,7 +316,7 @@ export async function up(db: Kysely): Promise { await sql`CREATE INDEX "IDX_sharedlink_albumId" ON "shared_links" ("albumId")`.execute(db); await sql`CREATE INDEX "IDX_5b7decce6c8d3db9593d6111a6" ON "shared_link__asset" ("assetsId")`.execute(db); await sql`CREATE INDEX "IDX_c9fab4aa97ffd1b034f3d6581a" ON "shared_link__asset" ("sharedLinksId")`.execute(db); - await sql`CREATE INDEX "clip_index" ON "smart_search" USING hnsw (embedding vector_cosine_ops) WITH (ef_construction = 300, m = 16)`.execute(db); + await sql.raw(vectorIndexQuery({ vectorExtension, table: 'smart_search', indexName: 'clip_index' })).execute(db); await sql`CREATE INDEX "IDX_d8ddd9d687816cc490432b3d4b" ON "session_sync_checkpoints" ("sessionId")`.execute(db); await sql`CREATE INDEX "IDX_session_sync_checkpoints_update_id" ON "session_sync_checkpoints" ("updateId")`.execute(db); await sql`CREATE INDEX "IDX_92e67dc508c705dd66c9461557" ON "tags" ("userId")`.execute(db); diff --git a/server/src/services/smart-info.service.spec.ts b/server/src/services/smart-info.service.spec.ts index df26e69108..ee94b89d72 100644 --- a/server/src/services/smart-info.service.spec.ts +++ b/server/src/services/smart-info.service.spec.ts @@ -58,10 +58,6 @@ describe(SmartInfoService.name, () => { expect(mocks.search.getDimensionSize).not.toHaveBeenCalled(); expect(mocks.search.setDimensionSize).not.toHaveBeenCalled(); expect(mocks.search.deleteAllSearchEmbeddings).not.toHaveBeenCalled(); - expect(mocks.job.getQueueStatus).not.toHaveBeenCalled(); - expect(mocks.job.pause).not.toHaveBeenCalled(); - expect(mocks.job.waitForQueueCompletion).not.toHaveBeenCalled(); - expect(mocks.job.resume).not.toHaveBeenCalled(); }); it('should return if model and DB dimension size are equal', async () => { @@ -72,38 +68,15 @@ describe(SmartInfoService.name, () => { expect(mocks.search.getDimensionSize).toHaveBeenCalledTimes(1); expect(mocks.search.setDimensionSize).not.toHaveBeenCalled(); expect(mocks.search.deleteAllSearchEmbeddings).not.toHaveBeenCalled(); - expect(mocks.job.getQueueStatus).not.toHaveBeenCalled(); - expect(mocks.job.pause).not.toHaveBeenCalled(); - expect(mocks.job.waitForQueueCompletion).not.toHaveBeenCalled(); - expect(mocks.job.resume).not.toHaveBeenCalled(); }); it('should update DB dimension size if model and DB have different values', async () => { mocks.search.getDimensionSize.mockResolvedValue(768); - mocks.job.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: false }); await sut.onConfigInit({ newConfig: systemConfigStub.machineLearningEnabled as SystemConfig }); expect(mocks.search.getDimensionSize).toHaveBeenCalledTimes(1); expect(mocks.search.setDimensionSize).toHaveBeenCalledWith(512); - expect(mocks.job.getQueueStatus).toHaveBeenCalledTimes(1); - expect(mocks.job.pause).toHaveBeenCalledTimes(1); - expect(mocks.job.waitForQueueCompletion).toHaveBeenCalledTimes(1); - expect(mocks.job.resume).toHaveBeenCalledTimes(1); - }); - - it('should skip pausing and resuming queue if already paused', async () => { - mocks.search.getDimensionSize.mockResolvedValue(768); - mocks.job.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: true }); - - await sut.onConfigInit({ newConfig: systemConfigStub.machineLearningEnabled as SystemConfig }); - - expect(mocks.search.getDimensionSize).toHaveBeenCalledTimes(1); - expect(mocks.search.setDimensionSize).toHaveBeenCalledWith(512); - expect(mocks.job.getQueueStatus).toHaveBeenCalledTimes(1); - expect(mocks.job.pause).not.toHaveBeenCalled(); - expect(mocks.job.waitForQueueCompletion).toHaveBeenCalledTimes(1); - expect(mocks.job.resume).not.toHaveBeenCalled(); }); }); @@ -120,10 +93,6 @@ describe(SmartInfoService.name, () => { expect(mocks.search.getDimensionSize).not.toHaveBeenCalled(); expect(mocks.search.setDimensionSize).not.toHaveBeenCalled(); expect(mocks.search.deleteAllSearchEmbeddings).not.toHaveBeenCalled(); - expect(mocks.job.getQueueStatus).not.toHaveBeenCalled(); - expect(mocks.job.pause).not.toHaveBeenCalled(); - expect(mocks.job.waitForQueueCompletion).not.toHaveBeenCalled(); - expect(mocks.job.resume).not.toHaveBeenCalled(); }); it('should return if model and DB dimension size are equal', async () => { @@ -141,15 +110,10 @@ describe(SmartInfoService.name, () => { expect(mocks.search.getDimensionSize).toHaveBeenCalledTimes(1); expect(mocks.search.setDimensionSize).not.toHaveBeenCalled(); expect(mocks.search.deleteAllSearchEmbeddings).not.toHaveBeenCalled(); - expect(mocks.job.getQueueStatus).not.toHaveBeenCalled(); - expect(mocks.job.pause).not.toHaveBeenCalled(); - expect(mocks.job.waitForQueueCompletion).not.toHaveBeenCalled(); - expect(mocks.job.resume).not.toHaveBeenCalled(); }); it('should update DB dimension size if model and DB have different values', async () => { mocks.search.getDimensionSize.mockResolvedValue(512); - mocks.job.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: false }); await sut.onConfigUpdate({ newConfig: { @@ -162,15 +126,10 @@ describe(SmartInfoService.name, () => { expect(mocks.search.getDimensionSize).toHaveBeenCalledTimes(1); expect(mocks.search.setDimensionSize).toHaveBeenCalledWith(768); - expect(mocks.job.getQueueStatus).toHaveBeenCalledTimes(1); - expect(mocks.job.pause).toHaveBeenCalledTimes(1); - expect(mocks.job.waitForQueueCompletion).toHaveBeenCalledTimes(1); - expect(mocks.job.resume).toHaveBeenCalledTimes(1); }); it('should clear embeddings if old and new models are different', async () => { mocks.search.getDimensionSize.mockResolvedValue(512); - mocks.job.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: false }); await sut.onConfigUpdate({ newConfig: { @@ -184,31 +143,6 @@ describe(SmartInfoService.name, () => { expect(mocks.search.deleteAllSearchEmbeddings).toHaveBeenCalled(); expect(mocks.search.getDimensionSize).toHaveBeenCalledTimes(1); expect(mocks.search.setDimensionSize).not.toHaveBeenCalled(); - expect(mocks.job.getQueueStatus).toHaveBeenCalledTimes(1); - expect(mocks.job.pause).toHaveBeenCalledTimes(1); - expect(mocks.job.waitForQueueCompletion).toHaveBeenCalledTimes(1); - expect(mocks.job.resume).toHaveBeenCalledTimes(1); - }); - - it('should skip pausing and resuming queue if already paused', async () => { - mocks.search.getDimensionSize.mockResolvedValue(512); - mocks.job.getQueueStatus.mockResolvedValue({ isActive: false, isPaused: true }); - - await sut.onConfigUpdate({ - newConfig: { - machineLearning: { clip: { modelName: 'ViT-B-32__openai', enabled: true }, enabled: true }, - } as SystemConfig, - oldConfig: { - machineLearning: { clip: { modelName: 'ViT-B-16__openai', enabled: true }, enabled: true }, - } as SystemConfig, - }); - - expect(mocks.search.getDimensionSize).toHaveBeenCalledTimes(1); - expect(mocks.search.setDimensionSize).not.toHaveBeenCalled(); - expect(mocks.job.getQueueStatus).toHaveBeenCalledTimes(1); - expect(mocks.job.pause).not.toHaveBeenCalled(); - expect(mocks.job.waitForQueueCompletion).toHaveBeenCalledTimes(1); - expect(mocks.job.resume).not.toHaveBeenCalled(); }); }); @@ -220,6 +154,7 @@ describe(SmartInfoService.name, () => { expect(mocks.asset.getAll).not.toHaveBeenCalled(); expect(mocks.asset.getWithout).not.toHaveBeenCalled(); + expect(mocks.search.setDimensionSize).not.toHaveBeenCalled(); }); it('should queue the assets without clip embeddings', async () => { @@ -234,7 +169,7 @@ describe(SmartInfoService.name, () => { { name: JobName.SMART_SEARCH, data: { id: assetStub.image.id } }, ]); expect(mocks.asset.getWithout).toHaveBeenCalledWith({ skip: 0, take: 1000 }, WithoutProperty.SMART_SEARCH); - expect(mocks.search.deleteAllSearchEmbeddings).not.toHaveBeenCalled(); + expect(mocks.search.setDimensionSize).not.toHaveBeenCalled(); }); it('should queue all the assets', async () => { @@ -249,7 +184,7 @@ describe(SmartInfoService.name, () => { { name: JobName.SMART_SEARCH, data: { id: assetStub.image.id } }, ]); expect(mocks.asset.getAll).toHaveBeenCalled(); - expect(mocks.search.deleteAllSearchEmbeddings).toHaveBeenCalled(); + expect(mocks.search.setDimensionSize).toHaveBeenCalledExactlyOnceWith(512); }); }); diff --git a/server/src/services/smart-info.service.ts b/server/src/services/smart-info.service.ts index 411114eb17..42fefb60b9 100644 --- a/server/src/services/smart-info.service.ts +++ b/server/src/services/smart-info.service.ts @@ -50,12 +50,6 @@ export class SmartInfoService extends BaseService { return; } - const { isPaused } = await this.jobRepository.getQueueStatus(QueueName.SMART_SEARCH); - if (!isPaused) { - await this.jobRepository.pause(QueueName.SMART_SEARCH); - } - await this.jobRepository.waitForQueueCompletion(QueueName.SMART_SEARCH); - if (dimSizeChange) { this.logger.log( `Dimension size of model ${newConfig.machineLearning.clip.modelName} is ${dimSize}, but database expects ${dbDimSize}.`, @@ -67,9 +61,8 @@ export class SmartInfoService extends BaseService { await this.searchRepository.deleteAllSearchEmbeddings(); } - if (!isPaused) { - await this.jobRepository.resume(QueueName.SMART_SEARCH); - } + // TODO: A job to reindex all assets should be scheduled, though user + // confirmation should probably be requested before doing that. }); } @@ -81,7 +74,9 @@ export class SmartInfoService extends BaseService { } if (force) { - await this.searchRepository.deleteAllSearchEmbeddings(); + const { dimSize } = getCLIPModelInfo(machineLearning.clip.modelName); + // in addition to deleting embeddings, update the dimension size in case it failed earlier + await this.searchRepository.setDimensionSize(dimSize); } const assetPagination = usePagination(JOBS_ASSET_PAGINATION_SIZE, (pagination) => { @@ -126,6 +121,12 @@ export class SmartInfoService extends BaseService { await this.databaseRepository.wait(DatabaseLock.CLIPDimSize); } + const newConfig = await this.getConfig({ withCache: true }); + if (machineLearning.clip.modelName !== newConfig.machineLearning.clip.modelName) { + // Skip the job if the the model has changed since the embedding was generated. + return JobStatus.SKIPPED; + } + await this.searchRepository.upsert(asset.id, embedding); return JobStatus.SUCCESS; diff --git a/server/src/utils/database.ts b/server/src/utils/database.ts index 8f0b56597a..b44ea5da46 100644 --- a/server/src/utils/database.ts +++ b/server/src/utils/database.ts @@ -17,10 +17,10 @@ import { parse } from 'pg-connection-string'; import postgres, { Notice } from 'postgres'; import { columns, Exif, Person } from 'src/database'; import { DB } from 'src/db'; -import { AssetFileType } from 'src/enum'; +import { AssetFileType, DatabaseExtension } from 'src/enum'; import { TimeBucketSize } from 'src/repositories/asset.repository'; import { AssetSearchBuilderOptions } from 'src/repositories/search.repository'; -import { DatabaseConnectionParams } from 'src/types'; +import { DatabaseConnectionParams, VectorExtension } from 'src/types'; type Ssl = 'require' | 'allow' | 'prefer' | 'verify-full' | boolean | object; @@ -373,3 +373,28 @@ export function searchAssetBuilder(kysely: Kysely, options: AssetSearchBuild .$if(!!(options.withFaces || options.withPeople || options.personIds), (qb) => qb.select(withFacesAndPeople)) .$if(!options.withDeleted, (qb) => qb.where('assets.deletedAt', 'is', null)); } + +type VectorIndexOptions = { vectorExtension: VectorExtension; table: string; indexName: string }; + +export function vectorIndexQuery({ vectorExtension, table, indexName }: VectorIndexOptions): string { + switch (vectorExtension) { + case DatabaseExtension.VECTORS: { + return ` + CREATE INDEX IF NOT EXISTS ${indexName} ON ${table} + USING vectors (embedding vector_cos_ops) WITH (options = $$ + [indexing.hnsw] + m = 16 + ef_construction = 300 + $$)`; + } + case DatabaseExtension.VECTOR: { + return ` + CREATE INDEX IF NOT EXISTS ${indexName} ON ${table} + USING hnsw (embedding vector_cosine_ops) + WITH (ef_construction = 300, m = 16)`; + } + default: { + throw new Error(`Unsupported vector extension: '${vectorExtension}'`); + } + } +} diff --git a/server/test/medium.factory.ts b/server/test/medium.factory.ts index 3684837baa..388c4df96b 100644 --- a/server/test/medium.factory.ts +++ b/server/test/medium.factory.ts @@ -173,7 +173,7 @@ export const getRepository = (key: K, db: Kys } case 'search': { - return new SearchRepository(db); + return new SearchRepository(db, new ConfigRepository()); } case 'session': {