mirror of
				https://github.com/immich-app/immich.git
				synced 2025-10-30 18:22:37 -04:00 
			
		
		
		
	bulk dedupe server endpoints
This commit is contained in:
		
							parent
							
								
									adb17c4d58
								
							
						
					
					
						commit
						f16bdb2a01
					
				| @ -1,7 +1,8 @@ | ||||
| import { Controller, Get } from '@nestjs/common'; | ||||
| import { Body, Controller, Get, Post } from '@nestjs/common'; | ||||
| import { ApiTags } from '@nestjs/swagger'; | ||||
| import { AuthDto } from 'src/dtos/auth.dto'; | ||||
| import { DuplicateResponseDto } from 'src/dtos/duplicate.dto'; | ||||
| import { DeduplicateAllDto, DuplicateResponseDto } from 'src/dtos/duplicate.dto'; | ||||
| import { Permission } from 'src/enum'; | ||||
| import { Auth, Authenticated } from 'src/middleware/auth.guard'; | ||||
| import { DuplicateService } from 'src/services/duplicate.service'; | ||||
| 
 | ||||
| @ -15,4 +16,16 @@ export class DuplicateController { | ||||
|   getAssetDuplicates(@Auth() auth: AuthDto): Promise<DuplicateResponseDto[]> { | ||||
|     return this.service.getDuplicates(auth); | ||||
|   } | ||||
| 
 | ||||
|   @Post('/bulk/keep') | ||||
|   @Authenticated({ permission: Permission.ASSET_UPDATE }) | ||||
|   async keepAll(@Auth() auth: AuthDto) { | ||||
|     await this.service.keepAll(auth); | ||||
|   } | ||||
| 
 | ||||
|   @Post('/bulk/deduplicate') | ||||
|   @Authenticated({ permission: Permission.ASSET_DELETE }) | ||||
|   async deduplicateAll(@Auth() auth: AuthDto, @Body() dto: DeduplicateAllDto) { | ||||
|     await this.service.deduplicateAll(auth, dto); | ||||
|   } | ||||
| } | ||||
|  | ||||
| @ -12,3 +12,9 @@ export class ResolveDuplicatesDto { | ||||
|   @ValidateUUID({ each: true }) | ||||
|   assetIds!: string[]; | ||||
| } | ||||
| 
 | ||||
| export class DeduplicateAllDto { | ||||
|   @IsNotEmpty() | ||||
|   @ValidateUUID({ each: true }) | ||||
|   assetIdsToKeep!: string[]; | ||||
| } | ||||
|  | ||||
| @ -146,10 +146,17 @@ export class AssetJobRepository { | ||||
| 
 | ||||
|   @GenerateSql({ params: [], stream: true }) | ||||
|   streamForSearchDuplicates(force?: boolean) { | ||||
|     return this.assetsWithPreviews() | ||||
|       .where((eb) => eb.not((eb) => eb.exists(eb.selectFrom('smart_search').whereRef('assetId', '=', 'assets.id')))) | ||||
|       .$if(!force, (qb) => qb.where('job_status.duplicatesDetectedAt', 'is', null)) | ||||
|     return this.db | ||||
|       .selectFrom('assets') | ||||
|       .select(['assets.id']) | ||||
|       .where('assets.visibility', '!=', AssetVisibility.HIDDEN) | ||||
|       .where('assets.deletedAt', 'is', null) | ||||
|       .innerJoin('smart_search', 'assets.id', 'smart_search.assetId') | ||||
|       .$if(!force, (qb) => | ||||
|         qb | ||||
|           .innerJoin('asset_job_status as job_status', 'assetId', 'assets.id') | ||||
|           .where('job_status.duplicatesDetectedAt', 'is', null), | ||||
|       ) | ||||
|       .stream(); | ||||
|   } | ||||
| 
 | ||||
|  | ||||
| @ -632,28 +632,22 @@ export class AssetRepository { | ||||
| 
 | ||||
|   @GenerateSql({ params: [DummyValue.UUID] }) | ||||
|   getDuplicates(userId: string) { | ||||
|     return ( | ||||
|       this.db | ||||
|     return this.db | ||||
|       .with('duplicates', (qb) => | ||||
|         qb | ||||
|           .selectFrom('assets') | ||||
|           .innerJoin('exif', 'assets.id', 'exif.assetId') | ||||
|           .leftJoinLateral( | ||||
|             (qb) => | ||||
|               qb | ||||
|                   .selectFrom('exif') | ||||
|                 .selectFrom(sql`(select 1)`.as('dummy')) | ||||
|                 .selectAll('assets') | ||||
|                 .select((eb) => eb.table('exif').as('exifInfo')) | ||||
|                   .whereRef('exif.assetId', '=', 'assets.id') | ||||
|                 .as('asset'), | ||||
|             (join) => join.onTrue(), | ||||
|           ) | ||||
|           .select('assets.duplicateId') | ||||
|             .select((eb) => | ||||
|               eb | ||||
|                 .fn('jsonb_agg', [eb.table('asset')]) | ||||
|                 .$castTo<MapAsset[]>() | ||||
|                 .as('assets'), | ||||
|             ) | ||||
|           .select((eb) => eb.fn.jsonAgg('asset').$castTo<MapAsset[]>().as('assets')) | ||||
|           .where('assets.ownerId', '=', asUuid(userId)) | ||||
|           .where('assets.duplicateId', 'is not', null) | ||||
|           .$narrowType<{ duplicateId: NotNull }>() | ||||
| @ -666,7 +660,7 @@ export class AssetRepository { | ||||
|         qb | ||||
|           .selectFrom('duplicates') | ||||
|           .select('duplicateId') | ||||
|             .where((eb) => eb(eb.fn('jsonb_array_length', ['assets']), '=', 1)), | ||||
|           .where((eb) => eb(eb.fn('json_array_length', ['assets']), '=', 1)), | ||||
|       ) | ||||
|       .with('removed_unique', (qb) => | ||||
|         qb | ||||
| @ -677,12 +671,61 @@ export class AssetRepository { | ||||
|       ) | ||||
|       .selectFrom('duplicates') | ||||
|       .selectAll() | ||||
|         // TODO: compare with filtering by jsonb_array_length > 1
 | ||||
|       .where(({ not, exists }) => | ||||
|         not(exists((eb) => eb.selectFrom('unique').whereRef('unique.duplicateId', '=', 'duplicates.duplicateId'))), | ||||
|       ) | ||||
|         .execute() | ||||
|     ); | ||||
|       .execute(); | ||||
|   } | ||||
| 
 | ||||
|   @GenerateSql({ params: [DummyValue.UUID] }) | ||||
|   streamDuplicates(userId: string) { | ||||
|     return this.db | ||||
|       .selectFrom('assets') | ||||
|       .innerJoin('exif', 'assets.id', 'exif.assetId') | ||||
|       .innerJoinLateral( | ||||
|         (qb) => | ||||
|           qb | ||||
|             .selectFrom(sql`(select 1)`.as('dummy')) | ||||
|             .selectAll('assets') | ||||
|             .select((eb) => eb.table('exif').as('exifInfo')) | ||||
|             .as('asset'), | ||||
|         (join) => join.onTrue(), | ||||
|       ) | ||||
|       .select('assets.duplicateId') | ||||
|       .select((eb) => eb.fn.jsonAgg('asset').as('assets')) | ||||
|       .where('assets.ownerId', '=', asUuid(userId)) | ||||
|       .where('assets.duplicateId', 'is not', null) | ||||
|       .$narrowType<{ duplicateId: NotNull }>() | ||||
|       .where('assets.deletedAt', 'is', null) | ||||
|       .where('assets.visibility', '!=', AssetVisibility.HIDDEN) | ||||
|       .where('assets.stackId', 'is', null) | ||||
|       .groupBy('assets.duplicateId') | ||||
|       .stream(); | ||||
|   } | ||||
| 
 | ||||
|   @GenerateSql({ params: [DummyValue.UUID] }) | ||||
|   keepAllDuplicates(userId: string) { | ||||
|     return this.db | ||||
|       .updateTable('assets') | ||||
|       .set({ duplicateId: null }) | ||||
|       .where('duplicateId', 'is not', null) | ||||
|       .where('ownerId', '=', userId) | ||||
|       .execute(); | ||||
|   } | ||||
| 
 | ||||
|   deduplicateAll(userId: string, keptAssetIds: string[], deduplicatedStatus: AssetStatus) { | ||||
|     return this.db | ||||
|       .with('kept', (qb) => | ||||
|         // anyUuid ensures the array is passed as a single parameter, so no need to chunk
 | ||||
|         qb.updateTable('assets').set({ duplicateId: null }).where('id', '=', anyUuid(keptAssetIds)).returning('id'), | ||||
|       ) | ||||
|       .updateTable('assets') | ||||
|       .from('kept') | ||||
|       .set({ duplicateId: null, status: deduplicatedStatus }) | ||||
|       .whereRef('id', '!=', 'kept.id') | ||||
|       .where('duplicateId', 'is not', null) | ||||
|       .where('ownerId', '=', userId) | ||||
|       .execute(); | ||||
|   } | ||||
| 
 | ||||
|   @GenerateSql({ params: [DummyValue.UUID, { minAssetsPerField: 5, maxFields: 12 }] }) | ||||
|  | ||||
| @ -3,8 +3,8 @@ import { JOBS_ASSET_PAGINATION_SIZE } from 'src/constants'; | ||||
| import { OnJob } from 'src/decorators'; | ||||
| import { mapAsset } from 'src/dtos/asset-response.dto'; | ||||
| import { AuthDto } from 'src/dtos/auth.dto'; | ||||
| import { DuplicateResponseDto } from 'src/dtos/duplicate.dto'; | ||||
| import { AssetFileType, AssetVisibility, JobName, JobStatus, QueueName } from 'src/enum'; | ||||
| import { DeduplicateAllDto, DuplicateResponseDto } from 'src/dtos/duplicate.dto'; | ||||
| import { AssetFileType, AssetStatus, AssetVisibility, JobName, JobStatus, QueueName } from 'src/enum'; | ||||
| import { AssetDuplicateResult } from 'src/repositories/search.repository'; | ||||
| import { BaseService } from 'src/services/base.service'; | ||||
| import { JobItem, JobOf } from 'src/types'; | ||||
| @ -21,6 +21,20 @@ export class DuplicateService extends BaseService { | ||||
|     })); | ||||
|   } | ||||
| 
 | ||||
|   keepAll(auth: AuthDto) { | ||||
|     return this.assetRepository.keepAllDuplicates(auth.user.id); | ||||
|   } | ||||
| 
 | ||||
|   async deduplicateAll(auth: AuthDto, dto: DeduplicateAllDto) { | ||||
|     if (dto.assetIdsToKeep.length === 0) { | ||||
|       return; | ||||
|     } | ||||
| 
 | ||||
|     const { trash } = await this.getConfig({ withCache: false }); | ||||
|     const deduplicatedStatus = trash.enabled ? AssetStatus.TRASHED : AssetStatus.DELETED; | ||||
|     return this.assetRepository.deduplicateAll(auth.user.id, dto.assetIdsToKeep, deduplicatedStatus); | ||||
|   } | ||||
| 
 | ||||
|   @OnJob({ name: JobName.QUEUE_DUPLICATE_DETECTION, queue: QueueName.DUPLICATE_DETECTION }) | ||||
|   async handleQueueSearchDuplicates({ force }: JobOf<JobName.QUEUE_DUPLICATE_DETECTION>): Promise<JobStatus> { | ||||
|     const { machineLearning } = await this.getConfig({ withCache: false }); | ||||
| @ -29,20 +43,16 @@ export class DuplicateService extends BaseService { | ||||
|     } | ||||
| 
 | ||||
|     let jobs: JobItem[] = []; | ||||
|     const queueAll = async () => { | ||||
|       await this.jobRepository.queueAll(jobs); | ||||
|       jobs = []; | ||||
|     }; | ||||
| 
 | ||||
|     const assets = this.assetJobRepository.streamForSearchDuplicates(force); | ||||
|     for await (const asset of assets) { | ||||
|       jobs.push({ name: JobName.DUPLICATE_DETECTION, data: { id: asset.id } }); | ||||
|       if (jobs.length >= JOBS_ASSET_PAGINATION_SIZE) { | ||||
|         await queueAll(); | ||||
|         await this.jobRepository.queueAll(jobs); | ||||
|         jobs = []; | ||||
|       } | ||||
|     } | ||||
| 
 | ||||
|     await queueAll(); | ||||
|     await this.jobRepository.queueAll(jobs); | ||||
| 
 | ||||
|     return JobStatus.SUCCESS; | ||||
|   } | ||||
|  | ||||
| @ -15,7 +15,7 @@ | ||||
|   import { suggestDuplicate } from '$lib/utils/duplicate-utils'; | ||||
|   import { handleError } from '$lib/utils/handle-error'; | ||||
|   import type { AssetResponseDto } from '@immich/sdk'; | ||||
|   import { deleteAssets, updateAssets } from '@immich/sdk'; | ||||
|   import { deduplicateAll, deleteAssets, keepAll, updateAssets } from '@immich/sdk'; | ||||
|   import { Button, HStack, IconButton, Text } from '@immich/ui'; | ||||
|   import { mdiCheckOutline, mdiInformationOutline, mdiKeyboard, mdiTrashCanOutline } from '@mdi/js'; | ||||
|   import { t } from 'svelte-i18n'; | ||||
| @ -101,33 +101,30 @@ | ||||
|   }; | ||||
| 
 | ||||
|   const handleDeduplicateAll = async () => { | ||||
|     const idsToKeep = duplicates.map((group) => suggestDuplicate(group.assets)).map((asset) => asset?.id); | ||||
|     const idsToDelete = duplicates.flatMap((group, i) => | ||||
|       group.assets.map((asset) => asset.id).filter((asset) => asset !== idsToKeep[i]), | ||||
|     ); | ||||
|     let assetCount = 0; | ||||
|     const assetIdsToKeep = duplicates.map((group) => suggestDuplicate(group.assets)!.id); | ||||
|     for (const group of duplicates) { | ||||
|       assetCount += group.assets.length; | ||||
|       assetIdsToKeep.push(suggestDuplicate(group.assets)!.id); | ||||
|     } | ||||
|     const dedupedAssetCount = assetCount - assetIdsToKeep.length; | ||||
| 
 | ||||
|     let prompt, confirmText; | ||||
|     if ($featureFlags.trash) { | ||||
|       prompt = $t('bulk_trash_duplicates_confirmation', { values: { count: idsToDelete.length } }); | ||||
|       prompt = $t('bulk_trash_duplicates_confirmation', { values: { count: dedupedAssetCount } }); | ||||
|       confirmText = $t('confirm'); | ||||
|     } else { | ||||
|       prompt = $t('bulk_delete_duplicates_confirmation', { values: { count: idsToDelete.length } }); | ||||
|       prompt = $t('bulk_delete_duplicates_confirmation', { values: { count: dedupedAssetCount } }); | ||||
|       confirmText = $t('permanently_delete'); | ||||
|     } | ||||
| 
 | ||||
|     return withConfirmation( | ||||
|       async () => { | ||||
|         await deleteAssets({ assetBulkDeleteDto: { ids: idsToDelete, force: !$featureFlags.trash } }); | ||||
|         await updateAssets({ | ||||
|           assetBulkUpdateDto: { | ||||
|             ids: [...idsToDelete, ...idsToKeep.filter((id): id is string => !!id)], | ||||
|             duplicateId: null, | ||||
|           }, | ||||
|         }); | ||||
|         await deduplicateAll({deduplicateAllDto: { assetIdsToKeep } }); | ||||
| 
 | ||||
|         duplicates = []; | ||||
| 
 | ||||
|         deletedNotification(idsToDelete.length); | ||||
|         deletedNotification(dedupedAssetCount); | ||||
|       }, | ||||
|       prompt, | ||||
|       confirmText, | ||||
| @ -135,10 +132,10 @@ | ||||
|   }; | ||||
| 
 | ||||
|   const handleKeepAll = async () => { | ||||
|     const ids = duplicates.flatMap((group) => group.assets.map((asset) => asset.id)); | ||||
|     const assetCount = duplicates.reduce((acc, cur) => acc + cur.assets.length, 0); | ||||
|     return withConfirmation( | ||||
|       async () => { | ||||
|         await updateAssets({ assetBulkUpdateDto: { ids, duplicateId: null } }); | ||||
|         await keepAll(); | ||||
| 
 | ||||
|         duplicates = []; | ||||
| 
 | ||||
| @ -147,7 +144,7 @@ | ||||
|           type: NotificationType.Info, | ||||
|         }); | ||||
|       }, | ||||
|       $t('bulk_keep_duplicates_confirmation', { values: { count: ids.length } }), | ||||
|       $t('bulk_keep_duplicates_confirmation', { values: { count: assetCount } }), | ||||
|       $t('confirm'), | ||||
|     ); | ||||
|   }; | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user