diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4ae464ae96..b269c16f18 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -343,6 +343,9 @@ importers: '@extism/extism': specifier: 2.0.0-rc13 version: 2.0.0-rc13 + '@immich/walkrs': + specifier: file:../../walkrs + version: file:../walkrs '@nestjs/bullmq': specifier: ^11.0.1 version: 11.0.4(@nestjs/common@11.1.12(class-transformer@0.5.1)(class-validator@0.14.3)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/core@11.1.12)(bullmq@5.66.5) @@ -3136,6 +3139,10 @@ packages: peerDependencies: svelte: ^5.0.0 + '@immich/walkrs@file:../walkrs': + resolution: {directory: ../walkrs, type: directory} + engines: {pnpm: '>=10.0.0'} + '@inquirer/ansi@1.0.2': resolution: {integrity: sha512-S8qNSZiYzFd0wAcyG5AXCvUHC5Sr7xpZ9wZ2py9XR88jUz8wooStVx5M6dRzczbBWjic9NP7+rY0Xi7qqK/aMQ==} engines: {node: '>=18'} @@ -15779,6 +15786,8 @@ snapshots: transitivePeerDependencies: - '@sveltejs/kit' + '@immich/walkrs@file:../walkrs': {} + '@inquirer/ansi@1.0.2': {} '@inquirer/ansi@2.0.3': {} diff --git a/server/Dockerfile b/server/Dockerfile index a609bd8bf6..a8a8b04713 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -73,12 +73,6 @@ RUN --mount=type=cache,id=pnpm-plugins,target=/buildcache/pnpm-store \ FROM ghcr.io/immich-app/base-server-prod:202601131104@sha256:c649c5838b6348836d27db6d49cadbbc6157feae7a1a237180c3dec03577ba8f -RUN apt-get update && \ - apt-get install -y fd-find && \ - ln -s /usr/bin/fdfind /usr/local/bin/fd && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - WORKDIR /usr/src/app ENV NODE_ENV=production \ NVIDIA_DRIVER_CAPABILITIES=all \ diff --git a/server/Dockerfile.dev b/server/Dockerfile.dev index 485fc8f995..b4bf7ac6f3 100644 --- a/server/Dockerfile.dev +++ b/server/Dockerfile.dev @@ -14,16 +14,13 @@ COPY ./package* ./pnpm* .pnpmfile.cjs /tmp/create-dep-cache/ COPY ./web/package* ./web/pnpm* /tmp/create-dep-cache/web/ COPY ./server/package* ./server/pnpm* /tmp/create-dep-cache/server/ COPY ./open-api/typescript-sdk/package* ./open-api/typescript-sdk/pnpm* /tmp/create-dep-cache/open-api/typescript-sdk/ +COPY --from=walkrs ./package*.json /tmp/walkrs/ +COPY --from=walkrs ./Cargo.toml /tmp/walkrs/ +COPY --from=walkrs ./src /tmp/walkrs/src/ WORKDIR /tmp/create-dep-cache RUN pnpm fetch && rm -rf /tmp/create-dep-cache && chmod -R o+rw /buildcache WORKDIR /usr/src/app -RUN apt-get update && \ - apt-get install -y fd-find && \ - ln -s /usr/bin/fdfind /usr/local/bin/fd && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - ENV PATH="${PATH}:/usr/src/app/server/bin:/usr/src/app/web/bin" \ IMMICH_ENV=development \ NVIDIA_DRIVER_CAPABILITIES=all \ diff --git a/server/package.json b/server/package.json index 31da11af3f..48d5a1e92c 100644 --- a/server/package.json +++ b/server/package.json @@ -35,6 +35,7 @@ }, "dependencies": { "@extism/extism": "2.0.0-rc13", + "@immich/walkrs": "file:../../walkrs", "@nestjs/bullmq": "^11.0.1", "@nestjs/common": "^11.0.4", "@nestjs/core": "^11.0.4", diff --git a/server/src/dtos/library.dto.ts b/server/src/dtos/library.dto.ts index af45fc9ec2..21a2b8cbd0 100644 --- a/server/src/dtos/library.dto.ts +++ b/server/src/dtos/library.dto.ts @@ -54,11 +54,10 @@ export class UpdateLibraryDto { exclusionPatterns?: string[]; } -export interface CrawlOptionsDto { - pathsToCrawl: string[]; +export interface WalkOptionsDto { + pathsToWalk: string[]; includeHidden?: boolean; exclusionPatterns?: string[]; - take?: number; } export class ValidateLibraryDto { diff --git a/server/src/repositories/storage.repository.spec.ts b/server/src/repositories/storage.repository.spec.ts index aaf875d3b3..1583ced990 100644 --- a/server/src/repositories/storage.repository.spec.ts +++ b/server/src/repositories/storage.repository.spec.ts @@ -1,12 +1,12 @@ import mockfs from 'mock-fs'; -import { CrawlOptionsDto } from 'src/dtos/library.dto'; +import { WalkOptionsDto } from 'src/dtos/library.dto'; import { LoggingRepository } from 'src/repositories/logging.repository'; import { StorageRepository } from 'src/repositories/storage.repository'; import { automock } from 'test/utils'; interface Test { test: string; - options: CrawlOptionsDto; + options: WalkOptionsDto; files: Record; } @@ -16,14 +16,14 @@ const tests: Test[] = [ { test: 'should return empty when crawling an empty path list', options: { - pathsToCrawl: [], + pathsToWalk: [], }, files: {}, }, { test: 'should crawl a single path', options: { - pathsToCrawl: ['/photos/'], + pathsToWalk: ['/photos/'], }, files: { '/photos/image.jpg': true, @@ -32,7 +32,7 @@ const tests: Test[] = [ { test: 'should exclude by file extension', options: { - pathsToCrawl: ['/photos/'], + pathsToWalk: ['/photos/'], exclusionPatterns: ['**/*.tif'], }, files: { @@ -43,7 +43,7 @@ const tests: Test[] = [ { test: 'should exclude by file extension without case sensitivity', options: { - pathsToCrawl: ['/photos/'], + pathsToWalk: ['/photos/'], exclusionPatterns: ['**/*.TIF'], }, files: { @@ -54,7 +54,7 @@ const tests: Test[] = [ { test: 'should exclude by folder', options: { - pathsToCrawl: ['/photos/'], + pathsToWalk: ['/photos/'], exclusionPatterns: ['**/raw/**'], }, files: { @@ -68,7 +68,7 @@ const tests: Test[] = [ { test: 'should crawl multiple paths', options: { - pathsToCrawl: ['/photos/', '/images/', '/albums/'], + pathsToWalk: ['/photos/', '/images/', '/albums/'], }, files: { '/photos/image1.jpg': true, @@ -79,7 +79,7 @@ const tests: Test[] = [ { test: 'should crawl a single path without trailing slash', options: { - pathsToCrawl: ['/photos'], + pathsToWalk: ['/photos'], }, files: { '/photos/image.jpg': true, @@ -88,7 +88,7 @@ const tests: Test[] = [ { test: 'should crawl a single path', options: { - pathsToCrawl: ['/photos/'], + pathsToWalk: ['/photos/'], }, files: { '/photos/image.jpg': true, @@ -100,7 +100,7 @@ const tests: Test[] = [ { test: 'should filter file extensions', options: { - pathsToCrawl: ['/photos/'], + pathsToWalk: ['/photos/'], }, files: { '/photos/image.jpg': true, @@ -111,7 +111,7 @@ const tests: Test[] = [ { test: 'should include photo and video extensions', options: { - pathsToCrawl: ['/photos/', '/videos/'], + pathsToWalk: ['/photos/', '/videos/'], }, files: { '/photos/image.jpg': true, @@ -133,7 +133,7 @@ const tests: Test[] = [ { test: 'should check file extensions without case sensitivity', options: { - pathsToCrawl: ['/photos/'], + pathsToWalk: ['/photos/'], }, files: { '/photos/image.jpg': true, @@ -150,7 +150,7 @@ const tests: Test[] = [ { test: 'should normalize the path', options: { - pathsToCrawl: ['/photos/1/../2'], + pathsToWalk: ['/photos/1/../2'], }, files: { '/photos/1/image.jpg': false, @@ -160,7 +160,7 @@ const tests: Test[] = [ { test: 'should return absolute paths', options: { - pathsToCrawl: ['photos'], + pathsToWalk: ['photos'], }, files: { [`${cwd}/photos/1.jpg`]: true, @@ -171,7 +171,7 @@ const tests: Test[] = [ { test: 'should support special characters in paths', options: { - pathsToCrawl: ['/photos (new)'], + pathsToWalk: ['/photos (new)'], }, files: { ['/photos (new)/1.jpg']: true, @@ -196,7 +196,7 @@ describe(StorageRepository.name, () => { it(test, async () => { mockfs(Object.fromEntries(Object.keys(files).map((file) => [file, '']))); - const actual = await sut.crawl(options); + const actual = await sut.walk(options); const expected = Object.entries(files) .filter((entry) => entry[1]) .map(([file]) => file); diff --git a/server/src/repositories/storage.repository.ts b/server/src/repositories/storage.repository.ts index 5b81d3594f..5e26287ff5 100644 --- a/server/src/repositories/storage.repository.ts +++ b/server/src/repositories/storage.repository.ts @@ -1,13 +1,13 @@ +import { walk } from '@immich/walkrs'; import { Injectable } from '@nestjs/common'; import archiver from 'archiver'; import chokidar, { ChokidarOptions } from 'chokidar'; -import { spawn } from 'node:child_process'; import { constants, createReadStream, createWriteStream, existsSync, mkdirSync, ReadOptionsWithBuffer } from 'node:fs'; import fs from 'node:fs/promises'; import path from 'node:path'; import { PassThrough, Readable, Writable } from 'node:stream'; import { createGunzip, createGzip } from 'node:zlib'; -import { CrawlOptionsDto } from 'src/dtos/library.dto'; +import { WalkOptionsDto } from 'src/dtos/library.dto'; import { LoggingRepository } from 'src/repositories/logging.repository'; import { mimeTypes } from 'src/utils/mime-types'; @@ -198,86 +198,19 @@ export class StorageRepository { }; } - async crawl(crawlOptions: CrawlOptionsDto): Promise { - const { pathsToCrawl, exclusionPatterns, includeHidden } = crawlOptions; - if (pathsToCrawl.length === 0) { + async walk(walkOptions: WalkOptionsDto): Promise { + const { pathsToWalk, exclusionPatterns, includeHidden } = walkOptions; + if (pathsToWalk.length === 0) { return []; } - return new Promise((resolve, reject) => { - const args: string[] = [ - '-t', - 'f', // File type: only files - '-a', // Absolute paths - '-i', // Case insensitive - '.', // Search pattern: match all files - ]; + const extensions = mimeTypes.getSupportedFileExtensions().map((ext) => ext.toLowerCase()); - if (includeHidden) { - args.push('-H'); - } - - for (const pattern of exclusionPatterns ?? []) { - args.push('-E', pattern); - } - - const extensions = mimeTypes.getSupportedFileExtensions(); - for (const ext of extensions) { - // fd expects extensions without the dot - args.push('-e', ext.replace(/^\./, '')); - } - - args.push(...pathsToCrawl); - - const fdfind = spawn('fdfind', args); - - const files: string[] = []; - let buffer = ''; - let stderr = ''; - - fdfind.stdout.on('data', (data) => { - buffer += data.toString(); - const lines = buffer.split('\n'); - // Keep the last partial line in the buffer - buffer = lines.pop() || ''; - - for (const line of lines) { - const trimmed = line.trim(); - if (trimmed.length > 0) { - files.push(trimmed); - } - } - }); - - fdfind.stderr.on('data', (data) => { - stderr += data.toString(); - }); - - fdfind.on('close', (code) => { - // Process any remaining data in the buffer - if (buffer.length > 0) { - const trimmed = buffer.trim(); - if (trimmed.length > 0) { - files.push(trimmed); - } - } - - if (code === 0) { - resolve(files); - } else { - reject(new Error(`fdfind process exited with code ${code}: ${stderr}`)); - } - }); - - fdfind.on('error', (error) => { - if ((error as NodeJS.ErrnoException).code === 'ENOENT') { - reject( - new Error('fdfind command not found. Please install fd-find: https://github.com/sharkdp/fd#installation'), - ); - } else { - reject(new Error(`Failed to spawn fdfind: ${error.message}`)); - } - }); + return await walk({ + paths: pathsToWalk.map((p) => path.resolve(p)), + includeHidden: includeHidden ?? false, + exclusionPatterns, + extensions, }); } diff --git a/server/src/services/library.service.spec.ts b/server/src/services/library.service.spec.ts index 52e01846bf..eb8709057b 100644 --- a/server/src/services/library.service.spec.ts +++ b/server/src/services/library.service.spec.ts @@ -160,7 +160,7 @@ describe(LibraryService.name, () => { const library = factory.library({ importPaths: ['/foo', '/bar'] }); mocks.library.get.mockResolvedValue(library); - mocks.storage.crawl.mockResolvedValue(['/data/user1/photo.jpg']); + mocks.storage.walk.mockResolvedValue(['/data/user1/photo.jpg']); mocks.storage.stat.mockResolvedValue({ isDirectory: () => true } as Stats); mocks.storage.checkFileExists.mockResolvedValue(true); mocks.asset.filterNewExternalAssetPaths.mockResolvedValue(['/data/user1/photo.jpg']); @@ -201,7 +201,7 @@ describe(LibraryService.name, () => { await sut.handleQueueSyncFiles({ id: library.id }); - expect(mocks.storage.crawl).toHaveBeenCalledWith({ + expect(mocks.storage.walk).toHaveBeenCalledWith({ pathsToCrawl: [library.importPaths[1]], exclusionPatterns: [], includeHidden: false, @@ -214,7 +214,7 @@ describe(LibraryService.name, () => { const library = factory.library({ importPaths: ['/foo', '/bar'] }); mocks.library.get.mockResolvedValue(library); - mocks.storage.crawl.mockResolvedValue(['/data/user1/photo.jpg']); + mocks.storage.walk.mockResolvedValue(['/data/user1/photo.jpg']); mocks.storage.stat.mockResolvedValue({ isDirectory: () => true } as Stats); mocks.storage.checkFileExists.mockResolvedValue(true); mocks.asset.filterNewExternalAssetPaths.mockResolvedValue(['/data/user1/photo.jpg']); @@ -256,7 +256,7 @@ describe(LibraryService.name, () => { await sut.handleQueueSyncFiles({ id: library.id }); - expect(mocks.storage.crawl).toHaveBeenCalledWith({ + expect(mocks.storage.walk).toHaveBeenCalledWith({ pathsToCrawl: [library.importPaths[1]], exclusionPatterns: [], includeHidden: false, @@ -269,7 +269,7 @@ describe(LibraryService.name, () => { const library = factory.library(); mocks.library.get.mockResolvedValue(library); - mocks.storage.crawl.mockResolvedValue([]); + mocks.storage.walk.mockResolvedValue([]); mocks.asset.getLibraryAssetCount.mockResolvedValue(1); mocks.asset.detectOfflineExternalAssets.mockResolvedValue({ numUpdatedRows: 1n }); @@ -287,7 +287,7 @@ describe(LibraryService.name, () => { const library = factory.library(); mocks.library.get.mockResolvedValue(library); - mocks.storage.crawl.mockResolvedValue([]); + mocks.storage.walk.mockResolvedValue([]); mocks.asset.getLibraryAssetCount.mockResolvedValue(0); mocks.asset.detectOfflineExternalAssets.mockResolvedValue({ numUpdatedRows: 1n }); @@ -301,7 +301,7 @@ describe(LibraryService.name, () => { const library = factory.library({ importPaths: ['/foo', '/bar'] }); mocks.library.get.mockResolvedValue(library); - mocks.storage.crawl.mockResolvedValue([]); + mocks.storage.walk.mockResolvedValue([]); mocks.library.streamAssetIds.mockReturnValue(makeStream([assetStub.external])); mocks.asset.getLibraryAssetCount.mockResolvedValue(1); mocks.asset.detectOfflineExternalAssets.mockResolvedValue({ numUpdatedRows: 0n }); diff --git a/server/src/services/library.service.ts b/server/src/services/library.service.ts index 5c33dd8979..1d1b0ed5c7 100644 --- a/server/src/services/library.service.ts +++ b/server/src/services/library.service.ts @@ -649,18 +649,18 @@ export class LibraryService extends BaseService { const crawlStart = Date.now(); - const pathsOnDisk = await this.storageRepository.crawl({ - pathsToCrawl: validImportPaths, + const pathsOnDisk = await this.storageRepository.walk({ + pathsToWalk: validImportPaths, includeHidden: false, exclusionPatterns: library.exclusionPatterns, }); - let importCount = 0; - this.logger.log( `Found ${pathsOnDisk.length} file(s) on disk in ${((Date.now() - crawlStart) / 1000).toFixed(2)}s, queuing for import...`, ); + let importCount = 0; + for (let i = 0; i < pathsOnDisk.length; i += JOBS_LIBRARY_PAGINATION_SIZE) { const pathChunk = pathsOnDisk.slice(i, i + JOBS_LIBRARY_PAGINATION_SIZE); const paths = await this.assetRepository.filterNewExternalAssetPaths(library.id, pathChunk); diff --git a/server/test/repositories/storage.repository.mock.ts b/server/test/repositories/storage.repository.mock.ts index 85c72b6c10..14106e346c 100644 --- a/server/test/repositories/storage.repository.mock.ts +++ b/server/test/repositories/storage.repository.mock.ts @@ -68,8 +68,7 @@ export const newStorageRepositoryMock = (): Mocked Promise.resolve(filepath)), stat: vitest.fn(), - crawl: vitest.fn(), - walk: vitest.fn().mockImplementation(async function* () {}), + walk: vitest.fn(), rename: vitest.fn(), copyFile: vitest.fn(), utimes: vitest.fn(),