refactor: remove smart search entity (#17447)

refactor: smart search entity
This commit is contained in:
Jason Rasmussen 2025-04-08 09:56:45 -04:00 committed by GitHub
parent 2b131fe935
commit fdbe6d649f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 125 additions and 134 deletions

4
server/src/db.d.ts vendored
View File

@ -4,7 +4,7 @@
*/
import type { ColumnType } from 'kysely';
import { AssetType, MemoryType, Permission, SyncEntityType } from 'src/enum';
import { AssetFileType, AssetType, MemoryType, Permission, SyncEntityType } from 'src/enum';
import { UserTable } from 'src/schema/tables/user.table';
import { OnThisDayData } from 'src/types';
@ -106,7 +106,7 @@ export interface AssetFiles {
createdAt: Generated<Timestamp>;
id: Generated<string>;
path: string;
type: string;
type: AssetFileType;
updatedAt: Generated<Timestamp>;
updateId: Generated<string>;
}

View File

@ -7,7 +7,6 @@ import { AssetFileEntity } from 'src/entities/asset-files.entity';
import { AssetJobStatusEntity } from 'src/entities/asset-job-status.entity';
import { ExifEntity } from 'src/entities/exif.entity';
import { SharedLinkEntity } from 'src/entities/shared-link.entity';
import { SmartSearchEntity } from 'src/entities/smart-search.entity';
import { StackEntity } from 'src/entities/stack.entity';
import { TagEntity } from 'src/entities/tag.entity';
import { UserEntity } from 'src/entities/user.entity';
@ -50,7 +49,6 @@ export class AssetEntity {
originalFileName!: string;
sidecarPath!: string | null;
exifInfo?: ExifEntity;
smartSearch?: SmartSearchEntity;
tags!: TagEntity[];
sharedLinks!: SharedLinkEntity[];
albums?: AlbumEntity[];
@ -97,9 +95,9 @@ export function withFiles(eb: ExpressionBuilder<DB, 'assets'>, type?: AssetFileT
return jsonArrayFrom(
eb
.selectFrom('asset_files')
.selectAll()
.selectAll('asset_files')
.whereRef('asset_files.assetId', '=', 'assets.id')
.$if(!!type, (qb) => qb.where('type', '=', type!)),
.$if(!!type, (qb) => qb.where('asset_files.type', '=', type!)),
).as('files');
}

View File

@ -1,7 +0,0 @@
import { AssetEntity } from 'src/entities/asset.entity';
export class SmartSearchEntity {
asset?: AssetEntity;
assetId!: string;
embedding!: string;
}

View File

@ -179,6 +179,37 @@ from
where
"livePhotoVideoId" = $1::uuid
-- AssetRepository.getAssetForSearchDuplicatesJob
select
"id",
"type",
"ownerId",
"duplicateId",
"stackId",
"isVisible",
"smart_search"."embedding",
(
select
coalesce(json_agg(agg), '[]')
from
(
select
"asset_files".*
from
"asset_files"
where
"asset_files"."assetId" = "assets"."id"
and "asset_files"."type" = $1
) as agg
) as "files"
from
"assets"
left join "smart_search" on "assets"."id" = "smart_search"."assetId"
where
"assets"."id" = $2::uuid
limit
$3
-- AssetRepository.getById
select
"assets".*

View File

@ -475,6 +475,26 @@ export class AssetRepository {
return count as number;
}
@GenerateSql({ params: [DummyValue.UUID] })
getAssetForSearchDuplicatesJob(id: string) {
return this.db
.selectFrom('assets')
.where('assets.id', '=', asUuid(id))
.leftJoin('smart_search', 'assets.id', 'smart_search.assetId')
.select((eb) => [
'id',
'type',
'ownerId',
'duplicateId',
'stackId',
'isVisible',
'smart_search.embedding',
withFiles(eb, AssetFileType.PREVIEW),
])
.limit(1)
.executeTakeFirst();
}
@GenerateSql({ params: [DummyValue.UUID] })
getById(
id: string,

View File

@ -1,4 +1,4 @@
import { JobName, JobStatus } from 'src/enum';
import { AssetFileType, AssetType, JobName, JobStatus } from 'src/enum';
import { WithoutProperty } from 'src/repositories/asset.repository';
import { DuplicateService } from 'src/services/duplicate.service';
import { SearchService } from 'src/services/search.service';
@ -9,6 +9,33 @@ import { beforeEach, vitest } from 'vitest';
vitest.useFakeTimers();
const hasEmbedding = {
id: 'asset-1',
ownerId: 'user-id',
files: [
{
assetId: 'asset-1',
createdAt: new Date(),
id: 'file-1',
path: 'preview.jpg',
type: AssetFileType.PREVIEW,
updatedAt: new Date(),
updateId: 'update-1',
},
],
isVisible: true,
stackId: null,
type: AssetType.IMAGE,
duplicateId: null,
embedding: '[1, 2, 3, 4]',
};
const hasDupe = {
...hasEmbedding,
id: 'asset-2',
duplicateId: 'duplicate-id',
};
describe(SearchService.name, () => {
let sut: DuplicateService;
let mocks: ServiceMocks;
@ -25,16 +52,16 @@ describe(SearchService.name, () => {
it('should get duplicates', async () => {
mocks.asset.getDuplicates.mockResolvedValue([
{
duplicateId: assetStub.hasDupe.duplicateId!,
assets: [assetStub.hasDupe, assetStub.hasDupe],
duplicateId: 'duplicate-id',
assets: [assetStub.image, assetStub.image],
},
]);
await expect(sut.getDuplicates(authStub.admin)).resolves.toEqual([
{
duplicateId: assetStub.hasDupe.duplicateId,
duplicateId: 'duplicate-id',
assets: [
expect.objectContaining({ id: assetStub.hasDupe.id }),
expect.objectContaining({ id: assetStub.hasDupe.id }),
expect.objectContaining({ id: assetStub.image.id }),
expect.objectContaining({ id: assetStub.image.id }),
],
},
]);
@ -175,7 +202,7 @@ describe(SearchService.name, () => {
it('should skip if asset is part of stack', async () => {
const id = assetStub.primaryImage.id;
mocks.asset.getById.mockResolvedValue(assetStub.primaryImage);
mocks.asset.getAssetForSearchDuplicatesJob.mockResolvedValue({ ...hasEmbedding, stackId: 'stack-id' });
const result = await sut.handleSearchDuplicates({ id });
@ -185,7 +212,7 @@ describe(SearchService.name, () => {
it('should skip if asset is not visible', async () => {
const id = assetStub.livePhotoMotionAsset.id;
mocks.asset.getById.mockResolvedValue(assetStub.livePhotoMotionAsset);
mocks.asset.getAssetForSearchDuplicatesJob.mockResolvedValue({ ...hasEmbedding, isVisible: false });
const result = await sut.handleSearchDuplicates({ id });
@ -194,7 +221,7 @@ describe(SearchService.name, () => {
});
it('should fail if asset is missing preview image', async () => {
mocks.asset.getById.mockResolvedValue(assetStub.noResizePath);
mocks.asset.getAssetForSearchDuplicatesJob.mockResolvedValue({ ...hasEmbedding, files: [] });
const result = await sut.handleSearchDuplicates({ id: assetStub.noResizePath.id });
@ -203,7 +230,7 @@ describe(SearchService.name, () => {
});
it('should fail if asset is missing embedding', async () => {
mocks.asset.getById.mockResolvedValue(assetStub.image);
mocks.asset.getAssetForSearchDuplicatesJob.mockResolvedValue({ ...hasEmbedding, embedding: null });
const result = await sut.handleSearchDuplicates({ id: assetStub.image.id });
@ -212,21 +239,21 @@ describe(SearchService.name, () => {
});
it('should search for duplicates and update asset with duplicateId', async () => {
mocks.asset.getById.mockResolvedValue(assetStub.hasEmbedding);
mocks.asset.getAssetForSearchDuplicatesJob.mockResolvedValue(hasEmbedding);
mocks.search.searchDuplicates.mockResolvedValue([
{ assetId: assetStub.image.id, distance: 0.01, duplicateId: null },
]);
const expectedAssetIds = [assetStub.image.id, assetStub.hasEmbedding.id];
const expectedAssetIds = [assetStub.image.id, hasEmbedding.id];
const result = await sut.handleSearchDuplicates({ id: assetStub.hasEmbedding.id });
const result = await sut.handleSearchDuplicates({ id: hasEmbedding.id });
expect(result).toBe(JobStatus.SUCCESS);
expect(mocks.search.searchDuplicates).toHaveBeenCalledWith({
assetId: assetStub.hasEmbedding.id,
embedding: assetStub.hasEmbedding.smartSearch!.embedding,
assetId: hasEmbedding.id,
embedding: hasEmbedding.embedding,
maxDistance: 0.01,
type: assetStub.hasEmbedding.type,
userIds: [assetStub.hasEmbedding.ownerId],
type: hasEmbedding.type,
userIds: [hasEmbedding.ownerId],
});
expect(mocks.asset.updateDuplicates).toHaveBeenCalledWith({
assetIds: expectedAssetIds,
@ -239,24 +266,24 @@ describe(SearchService.name, () => {
});
it('should use existing duplicate ID among matched duplicates', async () => {
const duplicateId = assetStub.hasDupe.duplicateId;
mocks.asset.getById.mockResolvedValue(assetStub.hasEmbedding);
mocks.search.searchDuplicates.mockResolvedValue([{ assetId: assetStub.hasDupe.id, distance: 0.01, duplicateId }]);
const expectedAssetIds = [assetStub.hasEmbedding.id];
const duplicateId = hasDupe.duplicateId;
mocks.asset.getAssetForSearchDuplicatesJob.mockResolvedValue(hasEmbedding);
mocks.search.searchDuplicates.mockResolvedValue([{ assetId: hasDupe.id, distance: 0.01, duplicateId }]);
const expectedAssetIds = [hasEmbedding.id];
const result = await sut.handleSearchDuplicates({ id: assetStub.hasEmbedding.id });
const result = await sut.handleSearchDuplicates({ id: hasEmbedding.id });
expect(result).toBe(JobStatus.SUCCESS);
expect(mocks.search.searchDuplicates).toHaveBeenCalledWith({
assetId: assetStub.hasEmbedding.id,
embedding: assetStub.hasEmbedding.smartSearch!.embedding,
assetId: hasEmbedding.id,
embedding: hasEmbedding.embedding,
maxDistance: 0.01,
type: assetStub.hasEmbedding.type,
userIds: [assetStub.hasEmbedding.ownerId],
type: hasEmbedding.type,
userIds: [hasEmbedding.ownerId],
});
expect(mocks.asset.updateDuplicates).toHaveBeenCalledWith({
assetIds: expectedAssetIds,
targetDuplicateId: assetStub.hasDupe.duplicateId,
targetDuplicateId: duplicateId,
duplicateIds: [],
});
expect(mocks.asset.upsertJobStatus).toHaveBeenCalledWith(
@ -265,15 +292,15 @@ describe(SearchService.name, () => {
});
it('should remove duplicateId if no duplicates found and asset has duplicateId', async () => {
mocks.asset.getById.mockResolvedValue(assetStub.hasDupe);
mocks.asset.getAssetForSearchDuplicatesJob.mockResolvedValue(hasDupe);
mocks.search.searchDuplicates.mockResolvedValue([]);
const result = await sut.handleSearchDuplicates({ id: assetStub.hasDupe.id });
const result = await sut.handleSearchDuplicates({ id: hasDupe.id });
expect(result).toBe(JobStatus.SUCCESS);
expect(mocks.asset.update).toHaveBeenCalledWith({ id: assetStub.hasDupe.id, duplicateId: null });
expect(mocks.asset.update).toHaveBeenCalledWith({ id: hasDupe.id, duplicateId: null });
expect(mocks.asset.upsertJobStatus).toHaveBeenCalledWith({
assetId: assetStub.hasDupe.id,
assetId: hasDupe.id,
duplicatesDetectedAt: expect.any(Date),
});
});

View File

@ -4,7 +4,6 @@ import { OnJob } from 'src/decorators';
import { mapAsset } from 'src/dtos/asset-response.dto';
import { AuthDto } from 'src/dtos/auth.dto';
import { DuplicateResponseDto } from 'src/dtos/duplicate.dto';
import { AssetEntity } from 'src/entities/asset.entity';
import { AssetFileType, JobName, JobStatus, QueueName } from 'src/enum';
import { WithoutProperty } from 'src/repositories/asset.repository';
import { AssetDuplicateResult } from 'src/repositories/search.repository';
@ -53,7 +52,7 @@ export class DuplicateService extends BaseService {
return JobStatus.SKIPPED;
}
const asset = await this.assetRepository.getById(id, { files: true, smartSearch: true });
const asset = await this.assetRepository.getAssetForSearchDuplicatesJob(id);
if (!asset) {
this.logger.error(`Asset ${id} not found`);
return JobStatus.FAILED;
@ -75,14 +74,14 @@ export class DuplicateService extends BaseService {
return JobStatus.FAILED;
}
if (!asset.smartSearch?.embedding) {
if (!asset.embedding) {
this.logger.debug(`Asset ${id} is missing embedding`);
return JobStatus.FAILED;
}
const duplicateAssets = await this.searchRepository.searchDuplicates({
assetId: asset.id,
embedding: asset.smartSearch.embedding,
embedding: asset.embedding,
maxDistance: machineLearning.duplicateDetection.maxDistance,
type: asset.type,
userIds: [asset.ownerId],
@ -105,7 +104,10 @@ export class DuplicateService extends BaseService {
return JobStatus.SUCCESS;
}
private async updateDuplicates(asset: AssetEntity, duplicateAssets: AssetDuplicateResult[]): Promise<string[]> {
private async updateDuplicates(
asset: { id: string; duplicateId: string | null },
duplicateAssets: AssetDuplicateResult[],
): Promise<string[]> {
const duplicateIds = [
...new Set(
duplicateAssets

View File

@ -13,7 +13,10 @@ import { PartnerRepository } from 'src/repositories/partner.repository';
import { IBulkAsset, ImmichFile, UploadFile } from 'src/types';
import { checkAccess } from 'src/utils/access';
export const getAssetFile = (files: AssetFileEntity[], type: AssetFileType | GeneratedImageType) => {
export const getAssetFile = <T extends { type: AssetFileType }>(
files: T[],
type: AssetFileType | GeneratedImageType,
) => {
return (files || []).find((file) => file.type === type);
};

View File

@ -851,88 +851,4 @@ export const assetStub = {
duplicateId: null,
isOffline: false,
}),
hasEmbedding: Object.freeze<AssetEntity>({
id: 'asset-id-embedding',
status: AssetStatus.ACTIVE,
deviceAssetId: 'device-asset-id',
fileModifiedAt: new Date('2023-02-23T05:06:29.716Z'),
fileCreatedAt: new Date('2023-02-23T05:06:29.716Z'),
owner: userStub.user1,
ownerId: 'user-id',
deviceId: 'device-id',
originalPath: '/original/path.jpg',
checksum: Buffer.from('file hash', 'utf8'),
type: AssetType.IMAGE,
files,
thumbhash: Buffer.from('blablabla', 'base64'),
encodedVideoPath: null,
createdAt: new Date('2023-02-23T05:06:29.716Z'),
updatedAt: new Date('2023-02-23T05:06:29.716Z'),
localDateTime: new Date('2023-02-23T05:06:29.716Z'),
isFavorite: true,
isArchived: false,
duration: null,
isVisible: true,
isExternal: false,
livePhotoVideo: null,
livePhotoVideoId: null,
tags: [],
sharedLinks: [],
originalFileName: 'asset-id.jpg',
faces: [],
deletedAt: null,
sidecarPath: null,
exifInfo: {
fileSizeInByte: 5000,
} as ExifEntity,
duplicateId: null,
smartSearch: {
assetId: 'asset-id',
embedding: '[1, 2, 3, 4]',
},
isOffline: false,
}),
hasDupe: Object.freeze<AssetEntity>({
id: 'asset-id-dupe',
status: AssetStatus.ACTIVE,
deviceAssetId: 'device-asset-id',
fileModifiedAt: new Date('2023-02-23T05:06:29.716Z'),
fileCreatedAt: new Date('2023-02-23T05:06:29.716Z'),
owner: userStub.user1,
ownerId: 'user-id',
deviceId: 'device-id',
originalPath: '/original/path.jpg',
checksum: Buffer.from('file hash', 'utf8'),
type: AssetType.IMAGE,
files,
thumbhash: Buffer.from('blablabla', 'base64'),
encodedVideoPath: null,
createdAt: new Date('2023-02-23T05:06:29.716Z'),
updatedAt: new Date('2023-02-23T05:06:29.716Z'),
localDateTime: new Date('2023-02-23T05:06:29.716Z'),
isFavorite: true,
isArchived: false,
duration: null,
isVisible: true,
isExternal: false,
livePhotoVideo: null,
livePhotoVideoId: null,
tags: [],
sharedLinks: [],
originalFileName: 'asset-id.jpg',
faces: [],
deletedAt: null,
sidecarPath: null,
exifInfo: {
fileSizeInByte: 5000,
} as ExifEntity,
duplicateId: 'duplicate-id',
smartSearch: {
assetId: 'asset-id',
embedding: '[1, 2, 3, 4]',
},
isOffline: false,
}),
};

View File

@ -11,6 +11,7 @@ export const newAssetRepositoryMock = (): Mocked<RepositoryInterface<AssetReposi
upsertJobStatus: vitest.fn(),
getByDayOfYear: vitest.fn(),
getByIds: vitest.fn().mockResolvedValue([]),
getAssetForSearchDuplicatesJob: vitest.fn(),
getByIdsWithAllRelations: vitest.fn().mockResolvedValue([]),
getByAlbumId: vitest.fn(),
getByDeviceIds: vitest.fn(),