diff --git a/server/src/constants.ts b/server/src/constants.ts index 6c0319fcee..c043948712 100644 --- a/server/src/constants.ts +++ b/server/src/constants.ts @@ -1,9 +1,10 @@ import { Duration } from 'luxon'; import { readFileSync } from 'node:fs'; import { SemVer } from 'semver'; -import { DatabaseExtension, ExifOrientation } from 'src/enum'; +import { DatabaseExtension, ExifOrientation, VectorIndex } from 'src/enum'; export const POSTGRES_VERSION_RANGE = '>=14.0.0'; +export const VECTORCHORD_VERSION_RANGE = '>=0.2 <0.4'; export const VECTORS_VERSION_RANGE = '>=0.2 <0.4'; export const VECTOR_VERSION_RANGE = '>=0.5 <1'; @@ -20,8 +21,22 @@ export const EXTENSION_NAMES: Record = { earthdistance: 'earthdistance', vector: 'pgvector', vectors: 'pgvecto.rs', + vchord: 'VectorChord', } as const; +export const VECTOR_EXTENSIONS = [ + DatabaseExtension.VECTORCHORD, + DatabaseExtension.VECTORS, + DatabaseExtension.VECTOR, +] as const; + +export const VECTOR_INDEX_TABLES = { + [VectorIndex.CLIP]: 'smart_search', + [VectorIndex.FACE]: 'face_search', +} as const; + +export const VECTORCHORD_LIST_SLACK_FACTOR = 1.2; + export const SALT_ROUNDS = 10; export const IWorker = 'IWorker'; diff --git a/server/src/dtos/env.dto.ts b/server/src/dtos/env.dto.ts index 7f0df8abb9..99fd1d2149 100644 --- a/server/src/dtos/env.dto.ts +++ b/server/src/dtos/env.dto.ts @@ -154,9 +154,9 @@ export class EnvDto { @Optional() DB_USERNAME?: string; - @IsEnum(['pgvector', 'pgvecto.rs']) + @IsEnum(['pgvector', 'pgvecto.rs', 'vectorchord']) @Optional() - DB_VECTOR_EXTENSION?: 'pgvector' | 'pgvecto.rs'; + DB_VECTOR_EXTENSION?: 'pgvector' | 'pgvecto.rs' | 'vectorchord'; @IsString() @Optional() diff --git a/server/src/enum.ts b/server/src/enum.ts index f214593975..46ab44e65c 100644 --- a/server/src/enum.ts +++ b/server/src/enum.ts @@ -412,6 +412,7 @@ export enum DatabaseExtension { EARTH_DISTANCE = 'earthdistance', VECTOR = 'vector', VECTORS = 'vectors', + VECTORCHORD = 'vchord', } export enum BootstrapEventPriority { diff --git a/server/src/migrations/1700713871511-UsePgVectors.ts b/server/src/migrations/1700713871511-UsePgVectors.ts index e67c7275a7..4511e1001b 100644 --- a/server/src/migrations/1700713871511-UsePgVectors.ts +++ b/server/src/migrations/1700713871511-UsePgVectors.ts @@ -1,15 +1,13 @@ -import { ConfigRepository } from 'src/repositories/config.repository'; +import { getVectorExtension } from 'src/repositories/database.repository'; import { getCLIPModelInfo } from 'src/utils/misc'; import { MigrationInterface, QueryRunner } from 'typeorm'; -const vectorExtension = new ConfigRepository().getEnv().database.vectorExtension; - export class UsePgVectors1700713871511 implements MigrationInterface { name = 'UsePgVectors1700713871511'; public async up(queryRunner: QueryRunner): Promise { await queryRunner.query(`SET search_path TO "$user", public, vectors`); - await queryRunner.query(`CREATE EXTENSION IF NOT EXISTS ${vectorExtension}`); + await queryRunner.query(`CREATE EXTENSION IF NOT EXISTS ${await getVectorExtension(queryRunner)}`); const faceDimQuery = await queryRunner.query(` SELECT CARDINALITY(embedding::real[]) as dimsize FROM asset_faces diff --git a/server/src/migrations/1700713994428-AddCLIPEmbeddingIndex.ts b/server/src/migrations/1700713994428-AddCLIPEmbeddingIndex.ts index b5d47bb8cd..43809d6364 100644 --- a/server/src/migrations/1700713994428-AddCLIPEmbeddingIndex.ts +++ b/server/src/migrations/1700713994428-AddCLIPEmbeddingIndex.ts @@ -1,13 +1,12 @@ -import { ConfigRepository } from 'src/repositories/config.repository'; +import { getVectorExtension } from 'src/repositories/database.repository'; import { vectorIndexQuery } from 'src/utils/database'; import { MigrationInterface, QueryRunner } from 'typeorm'; -const vectorExtension = new ConfigRepository().getEnv().database.vectorExtension; - export class AddCLIPEmbeddingIndex1700713994428 implements MigrationInterface { name = 'AddCLIPEmbeddingIndex1700713994428'; public async up(queryRunner: QueryRunner): Promise { + const vectorExtension = await getVectorExtension(queryRunner); await queryRunner.query(`SET search_path TO "$user", public, vectors`); await queryRunner.query(vectorIndexQuery({ vectorExtension, table: 'smart_search', indexName: 'clip_index' })); diff --git a/server/src/migrations/1700714033632-AddFaceEmbeddingIndex.ts b/server/src/migrations/1700714033632-AddFaceEmbeddingIndex.ts index 2b41788fe4..5ee91afbcc 100644 --- a/server/src/migrations/1700714033632-AddFaceEmbeddingIndex.ts +++ b/server/src/migrations/1700714033632-AddFaceEmbeddingIndex.ts @@ -1,13 +1,12 @@ -import { ConfigRepository } from 'src/repositories/config.repository'; +import { getVectorExtension } from 'src/repositories/database.repository'; import { vectorIndexQuery } from 'src/utils/database'; import { MigrationInterface, QueryRunner } from 'typeorm'; -const vectorExtension = new ConfigRepository().getEnv().database.vectorExtension; - export class AddFaceEmbeddingIndex1700714033632 implements MigrationInterface { name = 'AddFaceEmbeddingIndex1700714033632'; public async up(queryRunner: QueryRunner): Promise { + const vectorExtension = await getVectorExtension(queryRunner); await queryRunner.query(`SET search_path TO "$user", public, vectors`); await queryRunner.query(vectorIndexQuery({ vectorExtension, table: 'asset_faces', indexName: 'face_index' })); diff --git a/server/src/migrations/1718486162779-AddFaceSearchRelation.ts b/server/src/migrations/1718486162779-AddFaceSearchRelation.ts index 64849708d2..68e1618775 100644 --- a/server/src/migrations/1718486162779-AddFaceSearchRelation.ts +++ b/server/src/migrations/1718486162779-AddFaceSearchRelation.ts @@ -1,12 +1,11 @@ import { DatabaseExtension } from 'src/enum'; -import { ConfigRepository } from 'src/repositories/config.repository'; +import { getVectorExtension } from 'src/repositories/database.repository'; import { vectorIndexQuery } from 'src/utils/database'; import { MigrationInterface, QueryRunner } from 'typeorm'; -const vectorExtension = new ConfigRepository().getEnv().database.vectorExtension; - export class AddFaceSearchRelation1718486162779 implements MigrationInterface { public async up(queryRunner: QueryRunner): Promise { + const vectorExtension = await getVectorExtension(queryRunner); if (vectorExtension === DatabaseExtension.VECTORS) { await queryRunner.query(`SET search_path TO "$user", public, vectors`); } @@ -48,11 +47,11 @@ export class AddFaceSearchRelation1718486162779 implements MigrationInterface { await queryRunner.query(`ALTER TABLE face_search ALTER COLUMN embedding SET DATA TYPE vector(512)`); await queryRunner.query(vectorIndexQuery({ vectorExtension, table: 'smart_search', indexName: 'clip_index' })); - await queryRunner.query(vectorIndexQuery({ vectorExtension, table: 'face_search', indexName: 'face_index' })); } public async down(queryRunner: QueryRunner): Promise { + const vectorExtension = await getVectorExtension(queryRunner); if (vectorExtension === DatabaseExtension.VECTORS) { await queryRunner.query(`SET search_path TO "$user", public, vectors`); } diff --git a/server/src/repositories/config.repository.spec.ts b/server/src/repositories/config.repository.spec.ts index 143892fdd0..238b48bcef 100644 --- a/server/src/repositories/config.repository.spec.ts +++ b/server/src/repositories/config.repository.spec.ts @@ -89,7 +89,7 @@ describe('getEnv', () => { password: 'postgres', }, skipMigrations: false, - vectorExtension: 'vectors', + vectorExtension: undefined, }); }); diff --git a/server/src/repositories/config.repository.ts b/server/src/repositories/config.repository.ts index 9b3e406437..9a0a24f70f 100644 --- a/server/src/repositories/config.repository.ts +++ b/server/src/repositories/config.repository.ts @@ -58,7 +58,7 @@ export interface EnvData { database: { config: DatabaseConnectionParams; skipMigrations: boolean; - vectorExtension: VectorExtension; + vectorExtension?: VectorExtension; }; licensePublicKey: { @@ -196,6 +196,22 @@ const getEnv = (): EnvData => { ssl: dto.DB_SSL_MODE || undefined, }; + let vectorExtension: VectorExtension | undefined; + switch (dto.DB_VECTOR_EXTENSION) { + case 'pgvector': { + vectorExtension = DatabaseExtension.VECTOR; + break; + } + case 'pgvecto.rs': { + vectorExtension = DatabaseExtension.VECTORS; + break; + } + case 'vectorchord': { + vectorExtension = DatabaseExtension.VECTORCHORD; + break; + } + } + return { host: dto.IMMICH_HOST, port: dto.IMMICH_PORT || 2283, @@ -251,7 +267,7 @@ const getEnv = (): EnvData => { database: { config: databaseConnection, skipMigrations: dto.DB_SKIP_MIGRATIONS ?? false, - vectorExtension: dto.DB_VECTOR_EXTENSION === 'pgvector' ? DatabaseExtension.VECTOR : DatabaseExtension.VECTORS, + vectorExtension, }, licensePublicKey: isProd ? productionKeys : stagingKeys, diff --git a/server/src/repositories/database.repository.ts b/server/src/repositories/database.repository.ts index addf6bcff0..5174b67481 100644 --- a/server/src/repositories/database.repository.ts +++ b/server/src/repositories/database.repository.ts @@ -5,7 +5,16 @@ import { InjectKysely } from 'nestjs-kysely'; import { readdir } from 'node:fs/promises'; import { join, resolve } from 'node:path'; import semver from 'semver'; -import { EXTENSION_NAMES, POSTGRES_VERSION_RANGE, VECTOR_VERSION_RANGE, VECTORS_VERSION_RANGE } from 'src/constants'; +import { + EXTENSION_NAMES, + POSTGRES_VERSION_RANGE, + VECTOR_EXTENSIONS, + VECTOR_INDEX_TABLES, + VECTOR_VERSION_RANGE, + VECTORCHORD_LIST_SLACK_FACTOR, + VECTORCHORD_VERSION_RANGE, + VECTORS_VERSION_RANGE, +} from 'src/constants'; import { DB } from 'src/db'; import { GenerateSql } from 'src/decorators'; import { DatabaseExtension, DatabaseLock, VectorIndex } from 'src/enum'; @@ -14,11 +23,35 @@ import { LoggingRepository } from 'src/repositories/logging.repository'; import { ExtensionVersion, VectorExtension, VectorUpdateResult } from 'src/types'; import { vectorIndexQuery } from 'src/utils/database'; import { isValidInteger } from 'src/validation'; -import { DataSource } from 'typeorm'; +import { DataSource, QueryRunner } from 'typeorm'; + +let cachedVectorExtension: VectorExtension | undefined; +export async function getVectorExtension(runner: Kysely | QueryRunner): Promise { + if (cachedVectorExtension) { + return cachedVectorExtension; + } + + cachedVectorExtension = new ConfigRepository().getEnv().database.vectorExtension; + if (!cachedVectorExtension) { + let availableExtensions: { name: VectorExtension }[]; + const query = `SELECT name FROM pg_available_extensions WHERE name IN (${VECTOR_EXTENSIONS.map((ext) => `'${ext}'`).join(', ')})`; + if (runner instanceof Kysely) { + const { rows } = await sql.raw<{ name: VectorExtension }>(query).execute(runner); + availableExtensions = rows; + } else { + availableExtensions = (await runner.query(query)) as { name: VectorExtension }[]; + } + const extensionNames = new Set(availableExtensions.map((row) => row.name)); + cachedVectorExtension = VECTOR_EXTENSIONS.find((ext) => extensionNames.has(ext)); + } + if (!cachedVectorExtension) { + throw new Error(`No vector extension found. Available extensions: ${VECTOR_EXTENSIONS.join(', ')}`); + } + return cachedVectorExtension; +} @Injectable() export class DatabaseRepository { - private vectorExtension: VectorExtension; private readonly asyncLock = new AsyncLock(); constructor( @@ -26,7 +59,6 @@ export class DatabaseRepository { private logger: LoggingRepository, private configRepository: ConfigRepository, ) { - this.vectorExtension = configRepository.getEnv().database.vectorExtension; this.logger.setContext(DatabaseRepository.name); } @@ -34,6 +66,10 @@ export class DatabaseRepository { await this.db.destroy(); } + getVectorExtension(): Promise { + return getVectorExtension(this.db); + } + @GenerateSql({ params: [DatabaseExtension.VECTORS] }) async getExtensionVersion(extension: DatabaseExtension): Promise { const { rows } = await sql` @@ -45,7 +81,20 @@ export class DatabaseRepository { } getExtensionVersionRange(extension: VectorExtension): string { - return extension === DatabaseExtension.VECTORS ? VECTORS_VERSION_RANGE : VECTOR_VERSION_RANGE; + switch (extension) { + case DatabaseExtension.VECTORCHORD: { + return VECTORCHORD_VERSION_RANGE; + } + case DatabaseExtension.VECTORS: { + return VECTORS_VERSION_RANGE; + } + case DatabaseExtension.VECTOR: { + return VECTOR_VERSION_RANGE; + } + default: { + throw new Error(`Unsupported vector extension: '${extension}'`); + } + } } @GenerateSql() @@ -59,7 +108,13 @@ export class DatabaseRepository { } async createExtension(extension: DatabaseExtension): Promise { - await sql`CREATE EXTENSION IF NOT EXISTS ${sql.raw(extension)}`.execute(this.db); + await sql`CREATE EXTENSION IF NOT EXISTS ${sql.raw(extension)} CASCADE`.execute(this.db); + if (extension === DatabaseExtension.VECTORCHORD) { + await sql`ALTER DATABASE immich SET vchordrq.prewarm_dim = '512,640,768,1024,1152,1536'`.execute(this.db); + await sql`SET vchordrq.prewarm_dim = '512,640,768,1024,1152,1536'`.execute(this.db); + await sql`ALTER DATABASE immich SET vchordrq.probes = 1`.execute(this.db); + await sql`SET vchordrq.probes = 1`.execute(this.db); + } } async updateVectorExtension(extension: VectorExtension, targetVersion?: string): Promise { @@ -78,102 +133,134 @@ export class DatabaseRepository { await this.db.transaction().execute(async (tx) => { await this.setSearchPath(tx); - if (isVectors && installedVersion === '0.1.1') { - await this.setExtVersion(tx, DatabaseExtension.VECTORS, '0.1.11'); - } - - const isSchemaUpgrade = semver.satisfies(installedVersion, '0.1.1 || 0.1.11'); - if (isSchemaUpgrade && isVectors) { - await this.updateVectorsSchema(tx); - } - await sql`ALTER EXTENSION ${sql.raw(extension)} UPDATE TO ${sql.lit(targetVersion)}`.execute(tx); const diff = semver.diff(installedVersion, targetVersion); - if (isVectors && diff && ['minor', 'major'].includes(diff)) { + if (isVectors && (diff === 'major' || diff === 'minor')) { await sql`SELECT pgvectors_upgrade()`.execute(tx); restartRequired = true; - } else { - await this.reindex(VectorIndex.CLIP); - await this.reindex(VectorIndex.FACE); + } else if (diff) { + await Promise.all([this.reindexVectors(VectorIndex.CLIP), this.reindexVectors(VectorIndex.FACE)]); } }); return { restartRequired }; } - async reindex(index: VectorIndex): Promise { - try { - await sql`REINDEX INDEX ${sql.raw(index)}`.execute(this.db); - } catch (error) { - if (this.vectorExtension !== DatabaseExtension.VECTORS) { - throw error; - } - this.logger.warn(`Could not reindex index ${index}. Attempting to auto-fix.`); + async prewarm(index: VectorIndex): Promise { + const vectorExtension = await getVectorExtension(this.db); + if (vectorExtension !== DatabaseExtension.VECTORCHORD) { + return; + } + this.logger.debug(`Prewarming ${index}`); + await sql`SELECT vchordrq_prewarm(${index})`.execute(this.db); + } - const table = await this.getIndexTable(index); - const dimSize = await this.getDimSize(table); - await this.db.transaction().execute(async (tx) => { - await this.setSearchPath(tx); - await sql`DROP INDEX IF EXISTS ${sql.raw(index)}`.execute(tx); - await sql`ALTER TABLE ${sql.raw(table)} ALTER COLUMN embedding SET DATA TYPE real[]`.execute(tx); - await sql`ALTER TABLE ${sql.raw(table)} ALTER COLUMN embedding SET DATA TYPE vector(${sql.raw(String(dimSize))})`.execute( - tx, - ); - await sql.raw(vectorIndexQuery({ vectorExtension: this.vectorExtension, table, indexName: index })).execute(tx); - }); + async reindexVectorsIfNeeded(names: VectorIndex[]): Promise { + const { rows } = await sql<{ + indexdef: string; + indexname: string; + }>`SELECT indexdef, indexname FROM pg_indexes WHERE indexname = ANY(ARRAY[${sql.join(names)}])`.execute(this.db); + + let keyword: string; + const vectorExtension = await getVectorExtension(this.db); + switch (vectorExtension) { + case DatabaseExtension.VECTOR: { + keyword = 'using hnsw'; + break; + } + case DatabaseExtension.VECTORCHORD: { + keyword = 'using vchordrq'; + break; + } + case DatabaseExtension.VECTORS: { + keyword = 'using vectors'; + break; + } + } + + const promises = []; + for (const indexName of names) { + const row = rows.find((index) => index.indexname === indexName); + const table = VECTOR_INDEX_TABLES[indexName]; + if (!row) { + promises.push(this.reindexVectors(indexName)); + continue; + } + + switch (vectorExtension) { + case DatabaseExtension.VECTOR: + case DatabaseExtension.VECTORS: { + if (!row.indexdef.toLowerCase().includes(keyword)) { + promises.push(this.reindexVectors(indexName)); + } + break; + } + case DatabaseExtension.VECTORCHORD: { + const matches = row.indexdef.match(/(?<=lists = \[)\d+/g); + const lists = matches && matches.length > 0 ? Number(matches[0]) : 1; + promises.push( + this.db + .selectFrom(this.db.dynamic.table(table).as('t')) + .select((eb) => eb.fn.countAll().as('count')) + .executeTakeFirstOrThrow() + .then(({ count }) => { + const targetLists = this.targetListCount(count); + this.logger.log(`targetLists=${targetLists}, current=${lists} for ${indexName} of ${count} rows`); + if ( + !row.indexdef.toLowerCase().includes(keyword) || + // slack factor is to avoid frequent reindexing if the count is borderline + (lists !== targetLists && lists !== this.targetListCount(count * VECTORCHORD_LIST_SLACK_FACTOR)) + ) { + return this.reindexVectors(indexName, { lists: targetLists }); + } + }), + ); + break; + } + } + } + + if (promises.length > 0) { + await Promise.all(promises); } } - @GenerateSql({ params: [VectorIndex.CLIP] }) - async shouldReindex(name: VectorIndex): Promise { - if (this.vectorExtension !== DatabaseExtension.VECTORS) { - return false; + private async reindexVectors(indexName: VectorIndex, { lists }: { lists?: number } = {}): Promise { + this.logger.log(`Reindexing ${indexName}`); + const table = VECTOR_INDEX_TABLES[indexName]; + const vectorExtension = await getVectorExtension(this.db); + const { rows } = await sql<{ + columnName: string; + }>`SELECT column_name as "columnName" FROM information_schema.columns WHERE table_name = ${table}`.execute(this.db); + if (rows.length === 0) { + this.logger.warn( + `Table ${table} does not exist, skipping reindexing. This is only normal if this is a new Immich instance.`, + ); + return; } - - try { - const { rows } = await sql<{ - idx_status: string; - }>`SELECT idx_status FROM pg_vector_index_stat WHERE indexname = ${name}`.execute(this.db); - return rows[0]?.idx_status === 'UPGRADE'; - } catch (error) { - const message: string = (error as any).message; - if (message.includes('index is not existing')) { - return true; - } else if (message.includes('relation "pg_vector_index_stat" does not exist')) { - return false; + const dimSize = await this.getDimSize(table); + await this.db.transaction().execute(async (tx) => { + await sql`DROP INDEX IF EXISTS ${sql.raw(indexName)}`.execute(tx); + if (!rows.some((row) => row.columnName === 'embedding')) { + this.logger.warn(`Column 'embedding' does not exist in table '${table}', truncating and adding column.`); + await sql`TRUNCATE TABLE ${sql.raw(table)}`.execute(tx); + await sql`ALTER TABLE ${sql.raw(table)} ADD COLUMN embedding real[] NOT NULL`.execute(tx); } - throw error; - } + await sql`ALTER TABLE ${sql.raw(table)} ALTER COLUMN embedding SET DATA TYPE real[]`.execute(tx); + const schema = vectorExtension === DatabaseExtension.VECTORS ? 'vectors.' : ''; + await sql` + ALTER TABLE ${sql.raw(table)} + ALTER COLUMN embedding + SET DATA TYPE ${sql.raw(schema)}vector(${sql.raw(String(dimSize))})`.execute(tx); + await sql.raw(vectorIndexQuery({ vectorExtension, table, indexName, lists })).execute(tx); + }); } private async setSearchPath(tx: Transaction): Promise { await sql`SET search_path TO "$user", public, vectors`.execute(tx); } - private async setExtVersion(tx: Transaction, extName: DatabaseExtension, version: string): Promise { - await sql`UPDATE pg_catalog.pg_extension SET extversion = ${version} WHERE extname = ${extName}`.execute(tx); - } - - private async getIndexTable(index: VectorIndex): Promise { - const { rows } = await sql<{ - relname: string | null; - }>`SELECT relname FROM pg_stat_all_indexes WHERE indexrelname = ${index}`.execute(this.db); - const table = rows[0]?.relname; - if (!table) { - throw new Error(`Could not find table for index ${index}`); - } - return table; - } - - private async updateVectorsSchema(tx: Transaction): Promise { - const extension = DatabaseExtension.VECTORS; - await sql`CREATE SCHEMA IF NOT EXISTS ${extension}`.execute(tx); - await sql`UPDATE pg_catalog.pg_extension SET extrelocatable = true WHERE extname = ${extension}`.execute(tx); - await sql`ALTER EXTENSION vectors SET SCHEMA vectors`.execute(tx); - await sql`UPDATE pg_catalog.pg_extension SET extrelocatable = false WHERE extname = ${extension}`.execute(tx); - } - private async getDimSize(table: string, column = 'embedding'): Promise { const { rows } = await sql<{ dimsize: number }>` SELECT atttypmod as dimsize @@ -181,17 +268,29 @@ export class DatabaseRepository { JOIN pg_class c ON c.oid = f.attrelid WHERE c.relkind = 'r'::char AND f.attnum > 0 - AND c.relname = ${table} - AND f.attname = '${column}' + AND c.relname = ${table}::text + AND f.attname = ${column}::text `.execute(this.db); const dimSize = rows[0]?.dimsize; if (!isValidInteger(dimSize, { min: 1, max: 2 ** 16 })) { - throw new Error(`Could not retrieve dimension size`); + this.logger.warn(`Could not retrieve dimension size of column '${column}' in table '${table}', assuming 512`); + return 512; } return dimSize; } + // TODO: set probes in queries + private targetListCount(count: number) { + if (count < 128_000) { + return 1; + } else if (count < 2_048_000) { + return 1 << (32 - Math.clz32(count / 1000)); + } else { + return 1 << (33 - Math.clz32(Math.sqrt(count))); + } + } + async runMigrations(options?: { transaction?: 'all' | 'none' | 'each' }): Promise { const { database } = this.configRepository.getEnv(); diff --git a/server/src/repositories/search.repository.ts b/server/src/repositories/search.repository.ts index 4e6b6e0fcf..2ba71db46a 100644 --- a/server/src/repositories/search.repository.ts +++ b/server/src/repositories/search.repository.ts @@ -5,8 +5,8 @@ import { randomUUID } from 'node:crypto'; import { DB, Exif } from 'src/db'; import { DummyValue, GenerateSql } from 'src/decorators'; import { MapAsset } from 'src/dtos/asset-response.dto'; -import { AssetStatus, AssetType, AssetVisibility } from 'src/enum'; -import { ConfigRepository } from 'src/repositories/config.repository'; +import { AssetStatus, AssetType, AssetVisibility, VectorIndex } from 'src/enum'; +import { getVectorExtension } from 'src/repositories/database.repository'; import { anyUuid, asUuid, searchAssetBuilder, vectorIndexQuery } from 'src/utils/database'; import { paginationHelper } from 'src/utils/pagination'; import { isValidInteger } from 'src/validation'; @@ -168,10 +168,7 @@ export interface GetCameraMakesOptions { @Injectable() export class SearchRepository { - constructor( - @InjectKysely() private db: Kysely, - private configRepository: ConfigRepository, - ) {} + constructor(@InjectKysely() private db: Kysely) {} @GenerateSql({ params: [ @@ -448,14 +445,16 @@ export class SearchRepository { ); }); - const vectorExtension = this.configRepository.getEnv().database.vectorExtension; + const vectorExtension = await getVectorExtension(this.db); await this.db.transaction().execute(async (trx) => { await sql`drop index if exists clip_index`.execute(trx); await trx.schema .alterTable('smart_search') .alterColumn('embedding', (col) => col.setDataType(sql.raw(`vector(${dimSize})`))) .execute(); - await sql.raw(vectorIndexQuery({ vectorExtension, table: 'smart_search', indexName: 'clip_index' })).execute(trx); + await sql + .raw(vectorIndexQuery({ vectorExtension, table: 'smart_search', indexName: VectorIndex.CLIP })) + .execute(trx); await trx.schema.alterTable('smart_search').dropConstraint('dim_size_constraint').ifExists().execute(); }); diff --git a/server/src/schema/migrations/1744910873969-InitialMigration.ts b/server/src/schema/migrations/1744910873969-InitialMigration.ts index ce4a37ae3b..4579ae2b08 100644 --- a/server/src/schema/migrations/1744910873969-InitialMigration.ts +++ b/server/src/schema/migrations/1744910873969-InitialMigration.ts @@ -1,10 +1,9 @@ import { Kysely, sql } from 'kysely'; import { DatabaseExtension } from 'src/enum'; -import { ConfigRepository } from 'src/repositories/config.repository'; +import { getVectorExtension } from 'src/repositories/database.repository'; import { LoggingRepository } from 'src/repositories/logging.repository'; import { vectorIndexQuery } from 'src/utils/database'; -const vectorExtension = new ConfigRepository().getEnv().database.vectorExtension; const lastMigrationSql = sql<{ name: string }>`SELECT "name" FROM "migrations" ORDER BY "timestamp" DESC LIMIT 1;`; const tableExists = sql<{ result: string | null }>`select to_regclass('migrations') as "result"`; const logger = LoggingRepository.create(); @@ -25,6 +24,8 @@ export async function up(db: Kysely): Promise { return; } + const vectorExtension = await getVectorExtension(db); + await sql`CREATE EXTENSION IF NOT EXISTS "uuid-ossp";`.execute(db); await sql`CREATE EXTENSION IF NOT EXISTS "unaccent";`.execute(db); await sql`CREATE EXTENSION IF NOT EXISTS "cube";`.execute(db); diff --git a/server/src/services/database.service.spec.ts b/server/src/services/database.service.spec.ts index e0ab4a624d..c7fe0f1676 100644 --- a/server/src/services/database.service.spec.ts +++ b/server/src/services/database.service.spec.ts @@ -1,5 +1,5 @@ import { EXTENSION_NAMES } from 'src/constants'; -import { DatabaseExtension } from 'src/enum'; +import { DatabaseExtension, VectorIndex } from 'src/enum'; import { DatabaseService } from 'src/services/database.service'; import { VectorExtension } from 'src/types'; import { mockEnvData } from 'test/repositories/config.repository.mock'; @@ -49,6 +49,7 @@ describe(DatabaseService.name, () => { { extension: DatabaseExtension.VECTORS, extensionName: EXTENSION_NAMES[DatabaseExtension.VECTORS] }, ])('should work with $extensionName', ({ extension, extensionName }) => { beforeEach(() => { + mocks.database.getVectorExtension.mockResolvedValue(extension); mocks.config.getEnv.mockReturnValue( mockEnvData({ database: { @@ -240,41 +241,32 @@ describe(DatabaseService.name, () => { }); it(`should reindex ${extension} indices if needed`, async () => { - mocks.database.shouldReindex.mockResolvedValue(true); - await expect(sut.onBootstrap()).resolves.toBeUndefined(); - expect(mocks.database.shouldReindex).toHaveBeenCalledTimes(2); - expect(mocks.database.reindex).toHaveBeenCalledTimes(2); + expect(mocks.database.reindexVectorsIfNeeded).toHaveBeenCalledExactlyOnceWith([ + VectorIndex.CLIP, + VectorIndex.FACE, + ]); + expect(mocks.database.reindexVectorsIfNeeded).toHaveBeenCalledTimes(1); expect(mocks.database.runMigrations).toHaveBeenCalledTimes(1); expect(mocks.logger.fatal).not.toHaveBeenCalled(); }); it(`should throw an error if reindexing fails`, async () => { - mocks.database.shouldReindex.mockResolvedValue(true); - mocks.database.reindex.mockRejectedValue(new Error('Error reindexing')); + mocks.database.reindexVectorsIfNeeded.mockRejectedValue(new Error('Error reindexing')); await expect(sut.onBootstrap()).rejects.toBeDefined(); - expect(mocks.database.shouldReindex).toHaveBeenCalledTimes(1); - expect(mocks.database.reindex).toHaveBeenCalledTimes(1); + expect(mocks.database.reindexVectorsIfNeeded).toHaveBeenCalledExactlyOnceWith([ + VectorIndex.CLIP, + VectorIndex.FACE, + ]); expect(mocks.database.runMigrations).not.toHaveBeenCalled(); expect(mocks.logger.fatal).not.toHaveBeenCalled(); expect(mocks.logger.warn).toHaveBeenCalledWith( expect.stringContaining('Could not run vector reindexing checks.'), ); }); - - it(`should not reindex ${extension} indices if not needed`, async () => { - mocks.database.shouldReindex.mockResolvedValue(false); - - await expect(sut.onBootstrap()).resolves.toBeUndefined(); - - expect(mocks.database.shouldReindex).toHaveBeenCalledTimes(2); - expect(mocks.database.reindex).toHaveBeenCalledTimes(0); - expect(mocks.database.runMigrations).toHaveBeenCalledTimes(1); - expect(mocks.logger.fatal).not.toHaveBeenCalled(); - }); }); it('should skip migrations if DB_SKIP_MIGRATIONS=true', async () => { @@ -328,7 +320,7 @@ describe(DatabaseService.name, () => { expect(mocks.logger.fatal).toHaveBeenCalledTimes(1); expect(mocks.logger.fatal.mock.calls[0][0]).toContain( - `Alternatively, if your Postgres instance has pgvecto.rs, you may use this instead`, + `Alternatively, if your Postgres instance has any of vector, vectors, vchord, you may use one of them instead by setting the environment variable 'DB_VECTOR_EXTENSION='`, ); expect(mocks.database.createExtension).toHaveBeenCalledTimes(1); expect(mocks.database.updateVectorExtension).not.toHaveBeenCalled(); @@ -347,7 +339,7 @@ describe(DatabaseService.name, () => { expect(mocks.logger.fatal).toHaveBeenCalledTimes(1); expect(mocks.logger.fatal.mock.calls[0][0]).toContain( - `Alternatively, if your Postgres instance has pgvector, you may use this instead`, + `Alternatively, if your Postgres instance has any of vector, vectors, vchord, you may use one of them instead by setting the environment variable 'DB_VECTOR_EXTENSION='`, ); expect(mocks.database.createExtension).toHaveBeenCalledTimes(1); expect(mocks.database.updateVectorExtension).not.toHaveBeenCalled(); diff --git a/server/src/services/database.service.ts b/server/src/services/database.service.ts index d71dc25104..0cb729a799 100644 --- a/server/src/services/database.service.ts +++ b/server/src/services/database.service.ts @@ -6,7 +6,7 @@ import { BootstrapEventPriority, DatabaseExtension, DatabaseLock, VectorIndex } import { BaseService } from 'src/services/base.service'; import { VectorExtension } from 'src/types'; -type CreateFailedArgs = { name: string; extension: string; otherName: string }; +type CreateFailedArgs = { name: string; extension: string; otherExtensions: string[] }; type UpdateFailedArgs = { name: string; extension: string; availableVersion: string }; type RestartRequiredArgs = { name: string; availableVersion: string }; type NightlyVersionArgs = { name: string; extension: string; version: string }; @@ -25,7 +25,7 @@ const messages = { outOfRange: ({ name, version, range }: OutOfRangeArgs) => `The ${name} extension version is ${version}, but Immich only supports ${range}. Please change ${name} to a compatible version in the Postgres instance.`, - createFailed: ({ name, extension, otherName }: CreateFailedArgs) => + createFailed: ({ name, extension, otherExtensions }: CreateFailedArgs) => `Failed to activate ${name} extension. Please ensure the Postgres instance has ${name} installed. @@ -33,7 +33,7 @@ const messages = { In this case, please run 'CREATE EXTENSION IF NOT EXISTS ${extension}' manually as a superuser. See https://immich.app/docs/guides/database-queries for how to query the database. - Alternatively, if your Postgres instance has ${otherName}, you may use this instead by setting the environment variable 'DB_VECTOR_EXTENSION=${otherName}'. + Alternatively, if your Postgres instance has any of ${otherExtensions.join(', ')}, you may use one of them instead by setting the environment variable 'DB_VECTOR_EXTENSION='. Note that switching between the two extensions after a successful startup is not supported. The exception is if your version of Immich prior to upgrading was 1.90.2 or earlier. In this case, you may set either extension now, but you will not be able to switch to the other extension following a successful startup.`, @@ -67,8 +67,7 @@ export class DatabaseService extends BaseService { } await this.databaseRepository.withLock(DatabaseLock.Migrations, async () => { - const envData = this.configRepository.getEnv(); - const extension = envData.database.vectorExtension; + const extension = await this.databaseRepository.getVectorExtension(); const name = EXTENSION_NAMES[extension]; const extensionRange = this.databaseRepository.getExtensionVersionRange(extension); @@ -97,12 +96,20 @@ export class DatabaseService extends BaseService { throw new Error(messages.invalidDowngrade({ name, extension, availableVersion, installedVersion })); } - await this.checkReindexing(); + try { + await this.databaseRepository.reindexVectorsIfNeeded([VectorIndex.CLIP, VectorIndex.FACE]); + } catch (error) { + this.logger.warn( + 'Could not run vector reindexing checks. If the extension was updated, please restart the Postgres instance.', + ); + throw error; + } const { database } = this.configRepository.getEnv(); if (!database.skipMigrations) { await this.databaseRepository.runMigrations(); } + await this.databaseRepository.prewarm(VectorIndex.CLIP); }); } @@ -110,10 +117,13 @@ export class DatabaseService extends BaseService { try { await this.databaseRepository.createExtension(extension); } catch (error) { - const otherExtension = - extension === DatabaseExtension.VECTORS ? DatabaseExtension.VECTOR : DatabaseExtension.VECTORS; + const otherExtensions = [ + DatabaseExtension.VECTOR, + DatabaseExtension.VECTORS, + DatabaseExtension.VECTORCHORD, + ].filter((ext) => ext !== extension); const name = EXTENSION_NAMES[extension]; - this.logger.fatal(messages.createFailed({ name, extension, otherName: EXTENSION_NAMES[otherExtension] })); + this.logger.fatal(messages.createFailed({ name, extension, otherExtensions })); throw error; } } @@ -130,21 +140,4 @@ export class DatabaseService extends BaseService { throw error; } } - - private async checkReindexing() { - try { - if (await this.databaseRepository.shouldReindex(VectorIndex.CLIP)) { - await this.databaseRepository.reindex(VectorIndex.CLIP); - } - - if (await this.databaseRepository.shouldReindex(VectorIndex.FACE)) { - await this.databaseRepository.reindex(VectorIndex.FACE); - } - } catch (error) { - this.logger.warn( - 'Could not run vector reindexing checks. If the extension was updated, please restart the Postgres instance.', - ); - throw error; - } - } } diff --git a/server/src/services/person.service.ts b/server/src/services/person.service.ts index e6161b8f9c..73963f6fb1 100644 --- a/server/src/services/person.service.ts +++ b/server/src/services/person.service.ts @@ -33,6 +33,7 @@ import { QueueName, SourceType, SystemMetadataKey, + VectorIndex, } from 'src/enum'; import { BoundingBox } from 'src/repositories/machine-learning.repository'; import { UpdateFacesData } from 'src/repositories/person.repository'; @@ -416,6 +417,8 @@ export class PersonService extends BaseService { return JobStatus.SKIPPED; } + await this.databaseRepository.prewarm(VectorIndex.FACE); + const lastRun = new Date().toISOString(); const facePagination = this.personRepository.getAllFaces( force ? undefined : { personId: null, sourceType: SourceType.MACHINE_LEARNING }, diff --git a/server/src/types.ts b/server/src/types.ts index 2f5bfad02c..a8393ab369 100644 --- a/server/src/types.ts +++ b/server/src/types.ts @@ -1,7 +1,7 @@ import { SystemConfig } from 'src/config'; +import { VECTOR_EXTENSIONS } from 'src/constants'; import { AssetType, - DatabaseExtension, DatabaseSslMode, ExifOrientation, ImageFormat, @@ -367,7 +367,7 @@ export type JobItem = | { name: JobName.MEMORIES_CLEANUP; data?: IBaseJob } | { name: JobName.MEMORIES_CREATE; data?: IBaseJob }; -export type VectorExtension = DatabaseExtension.VECTOR | DatabaseExtension.VECTORS; +export type VectorExtension = (typeof VECTOR_EXTENSIONS)[number]; export type DatabaseConnectionURL = { connectionType: 'url'; diff --git a/server/src/utils/database.ts b/server/src/utils/database.ts index bacdf06d67..132ab82109 100644 --- a/server/src/utils/database.ts +++ b/server/src/utils/database.ts @@ -383,10 +383,22 @@ export function searchAssetBuilder(kysely: Kysely, options: AssetSearchBuild .$if(!options.withDeleted, (qb) => qb.where('assets.deletedAt', 'is', null)); } -type VectorIndexOptions = { vectorExtension: VectorExtension; table: string; indexName: string }; +export type ReindexVectorIndexOptions = { indexName: string; lists?: number }; -export function vectorIndexQuery({ vectorExtension, table, indexName }: VectorIndexOptions): string { +type VectorIndexQueryOptions = { table: string; vectorExtension: VectorExtension } & ReindexVectorIndexOptions; + +export function vectorIndexQuery({ vectorExtension, table, indexName, lists }: VectorIndexQueryOptions): string { switch (vectorExtension) { + case DatabaseExtension.VECTORCHORD: { + return ` + CREATE INDEX IF NOT EXISTS ${indexName} ON ${table} USING vchordrq (embedding vector_cosine_ops) WITH (options = $$ + residual_quantization = false + [build.internal] + lists = [${lists ?? 1}] + spherical_centroids = true + build_threads = 4 + $$)`; + } case DatabaseExtension.VECTORS: { return ` CREATE INDEX IF NOT EXISTS ${indexName} ON ${table} diff --git a/server/test/repositories/database.repository.mock.ts b/server/test/repositories/database.repository.mock.ts index eeedf682de..7933bd8188 100644 --- a/server/test/repositories/database.repository.mock.ts +++ b/server/test/repositories/database.repository.mock.ts @@ -6,13 +6,14 @@ export const newDatabaseRepositoryMock = (): Mocked=14.0.0'), createExtension: vitest.fn().mockResolvedValue(void 0), updateVectorExtension: vitest.fn(), - reindex: vitest.fn(), - shouldReindex: vitest.fn(), + reindexVectorsIfNeeded: vitest.fn(), + prewarm: vitest.fn(), runMigrations: vitest.fn(), withLock: vitest.fn().mockImplementation((_, function_: () => Promise) => function_()), tryLock: vitest.fn(),