feat: crawl using ignore

This commit is contained in:
Jonathan Jogenfors
2026-02-12 00:30:16 +01:00
parent 0a055d0fc7
commit 8937fe0133
10 changed files with 55 additions and 123 deletions
+9
View File
@@ -343,6 +343,9 @@ importers:
'@extism/extism':
specifier: 2.0.0-rc13
version: 2.0.0-rc13
'@immich/walkrs':
specifier: file:../../walkrs
version: file:../walkrs
'@nestjs/bullmq':
specifier: ^11.0.1
version: 11.0.4(@nestjs/common@11.1.12(class-transformer@0.5.1)(class-validator@0.14.3)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/core@11.1.12)(bullmq@5.66.5)
@@ -3136,6 +3139,10 @@ packages:
peerDependencies:
svelte: ^5.0.0
'@immich/walkrs@file:../walkrs':
resolution: {directory: ../walkrs, type: directory}
engines: {pnpm: '>=10.0.0'}
'@inquirer/ansi@1.0.2':
resolution: {integrity: sha512-S8qNSZiYzFd0wAcyG5AXCvUHC5Sr7xpZ9wZ2py9XR88jUz8wooStVx5M6dRzczbBWjic9NP7+rY0Xi7qqK/aMQ==}
engines: {node: '>=18'}
@@ -15779,6 +15786,8 @@ snapshots:
transitivePeerDependencies:
- '@sveltejs/kit'
'@immich/walkrs@file:../walkrs': {}
'@inquirer/ansi@1.0.2': {}
'@inquirer/ansi@2.0.3': {}
-6
View File
@@ -73,12 +73,6 @@ RUN --mount=type=cache,id=pnpm-plugins,target=/buildcache/pnpm-store \
FROM ghcr.io/immich-app/base-server-prod:202601131104@sha256:c649c5838b6348836d27db6d49cadbbc6157feae7a1a237180c3dec03577ba8f
RUN apt-get update && \
apt-get install -y fd-find && \
ln -s /usr/bin/fdfind /usr/local/bin/fd && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
WORKDIR /usr/src/app
ENV NODE_ENV=production \
NVIDIA_DRIVER_CAPABILITIES=all \
+3 -6
View File
@@ -14,16 +14,13 @@ COPY ./package* ./pnpm* .pnpmfile.cjs /tmp/create-dep-cache/
COPY ./web/package* ./web/pnpm* /tmp/create-dep-cache/web/
COPY ./server/package* ./server/pnpm* /tmp/create-dep-cache/server/
COPY ./open-api/typescript-sdk/package* ./open-api/typescript-sdk/pnpm* /tmp/create-dep-cache/open-api/typescript-sdk/
COPY --from=walkrs ./package*.json /tmp/walkrs/
COPY --from=walkrs ./Cargo.toml /tmp/walkrs/
COPY --from=walkrs ./src /tmp/walkrs/src/
WORKDIR /tmp/create-dep-cache
RUN pnpm fetch && rm -rf /tmp/create-dep-cache && chmod -R o+rw /buildcache
WORKDIR /usr/src/app
RUN apt-get update && \
apt-get install -y fd-find && \
ln -s /usr/bin/fdfind /usr/local/bin/fd && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
ENV PATH="${PATH}:/usr/src/app/server/bin:/usr/src/app/web/bin" \
IMMICH_ENV=development \
NVIDIA_DRIVER_CAPABILITIES=all \
+1
View File
@@ -35,6 +35,7 @@
},
"dependencies": {
"@extism/extism": "2.0.0-rc13",
"@immich/walkrs": "file:../../walkrs",
"@nestjs/bullmq": "^11.0.1",
"@nestjs/common": "^11.0.4",
"@nestjs/core": "^11.0.4",
+2 -3
View File
@@ -54,11 +54,10 @@ export class UpdateLibraryDto {
exclusionPatterns?: string[];
}
export interface CrawlOptionsDto {
pathsToCrawl: string[];
export interface WalkOptionsDto {
pathsToWalk: string[];
includeHidden?: boolean;
exclusionPatterns?: string[];
take?: number;
}
export class ValidateLibraryDto {
@@ -1,12 +1,12 @@
import mockfs from 'mock-fs';
import { CrawlOptionsDto } from 'src/dtos/library.dto';
import { WalkOptionsDto } from 'src/dtos/library.dto';
import { LoggingRepository } from 'src/repositories/logging.repository';
import { StorageRepository } from 'src/repositories/storage.repository';
import { automock } from 'test/utils';
interface Test {
test: string;
options: CrawlOptionsDto;
options: WalkOptionsDto;
files: Record<string, boolean>;
}
@@ -16,14 +16,14 @@ const tests: Test[] = [
{
test: 'should return empty when crawling an empty path list',
options: {
pathsToCrawl: [],
pathsToWalk: [],
},
files: {},
},
{
test: 'should crawl a single path',
options: {
pathsToCrawl: ['/photos/'],
pathsToWalk: ['/photos/'],
},
files: {
'/photos/image.jpg': true,
@@ -32,7 +32,7 @@ const tests: Test[] = [
{
test: 'should exclude by file extension',
options: {
pathsToCrawl: ['/photos/'],
pathsToWalk: ['/photos/'],
exclusionPatterns: ['**/*.tif'],
},
files: {
@@ -43,7 +43,7 @@ const tests: Test[] = [
{
test: 'should exclude by file extension without case sensitivity',
options: {
pathsToCrawl: ['/photos/'],
pathsToWalk: ['/photos/'],
exclusionPatterns: ['**/*.TIF'],
},
files: {
@@ -54,7 +54,7 @@ const tests: Test[] = [
{
test: 'should exclude by folder',
options: {
pathsToCrawl: ['/photos/'],
pathsToWalk: ['/photos/'],
exclusionPatterns: ['**/raw/**'],
},
files: {
@@ -68,7 +68,7 @@ const tests: Test[] = [
{
test: 'should crawl multiple paths',
options: {
pathsToCrawl: ['/photos/', '/images/', '/albums/'],
pathsToWalk: ['/photos/', '/images/', '/albums/'],
},
files: {
'/photos/image1.jpg': true,
@@ -79,7 +79,7 @@ const tests: Test[] = [
{
test: 'should crawl a single path without trailing slash',
options: {
pathsToCrawl: ['/photos'],
pathsToWalk: ['/photos'],
},
files: {
'/photos/image.jpg': true,
@@ -88,7 +88,7 @@ const tests: Test[] = [
{
test: 'should crawl a single path',
options: {
pathsToCrawl: ['/photos/'],
pathsToWalk: ['/photos/'],
},
files: {
'/photos/image.jpg': true,
@@ -100,7 +100,7 @@ const tests: Test[] = [
{
test: 'should filter file extensions',
options: {
pathsToCrawl: ['/photos/'],
pathsToWalk: ['/photos/'],
},
files: {
'/photos/image.jpg': true,
@@ -111,7 +111,7 @@ const tests: Test[] = [
{
test: 'should include photo and video extensions',
options: {
pathsToCrawl: ['/photos/', '/videos/'],
pathsToWalk: ['/photos/', '/videos/'],
},
files: {
'/photos/image.jpg': true,
@@ -133,7 +133,7 @@ const tests: Test[] = [
{
test: 'should check file extensions without case sensitivity',
options: {
pathsToCrawl: ['/photos/'],
pathsToWalk: ['/photos/'],
},
files: {
'/photos/image.jpg': true,
@@ -150,7 +150,7 @@ const tests: Test[] = [
{
test: 'should normalize the path',
options: {
pathsToCrawl: ['/photos/1/../2'],
pathsToWalk: ['/photos/1/../2'],
},
files: {
'/photos/1/image.jpg': false,
@@ -160,7 +160,7 @@ const tests: Test[] = [
{
test: 'should return absolute paths',
options: {
pathsToCrawl: ['photos'],
pathsToWalk: ['photos'],
},
files: {
[`${cwd}/photos/1.jpg`]: true,
@@ -171,7 +171,7 @@ const tests: Test[] = [
{
test: 'should support special characters in paths',
options: {
pathsToCrawl: ['/photos (new)'],
pathsToWalk: ['/photos (new)'],
},
files: {
['/photos (new)/1.jpg']: true,
@@ -196,7 +196,7 @@ describe(StorageRepository.name, () => {
it(test, async () => {
mockfs(Object.fromEntries(Object.keys(files).map((file) => [file, ''])));
const actual = await sut.crawl(options);
const actual = await sut.walk(options);
const expected = Object.entries(files)
.filter((entry) => entry[1])
.map(([file]) => file);
+11 -78
View File
@@ -1,13 +1,13 @@
import { walk } from '@immich/walkrs';
import { Injectable } from '@nestjs/common';
import archiver from 'archiver';
import chokidar, { ChokidarOptions } from 'chokidar';
import { spawn } from 'node:child_process';
import { constants, createReadStream, createWriteStream, existsSync, mkdirSync, ReadOptionsWithBuffer } from 'node:fs';
import fs from 'node:fs/promises';
import path from 'node:path';
import { PassThrough, Readable, Writable } from 'node:stream';
import { createGunzip, createGzip } from 'node:zlib';
import { CrawlOptionsDto } from 'src/dtos/library.dto';
import { WalkOptionsDto } from 'src/dtos/library.dto';
import { LoggingRepository } from 'src/repositories/logging.repository';
import { mimeTypes } from 'src/utils/mime-types';
@@ -198,86 +198,19 @@ export class StorageRepository {
};
}
async crawl(crawlOptions: CrawlOptionsDto): Promise<string[]> {
const { pathsToCrawl, exclusionPatterns, includeHidden } = crawlOptions;
if (pathsToCrawl.length === 0) {
async walk(walkOptions: WalkOptionsDto): Promise<string[]> {
const { pathsToWalk, exclusionPatterns, includeHidden } = walkOptions;
if (pathsToWalk.length === 0) {
return [];
}
return new Promise((resolve, reject) => {
const args: string[] = [
'-t',
'f', // File type: only files
'-a', // Absolute paths
'-i', // Case insensitive
'.', // Search pattern: match all files
];
const extensions = mimeTypes.getSupportedFileExtensions().map((ext) => ext.toLowerCase());
if (includeHidden) {
args.push('-H');
}
for (const pattern of exclusionPatterns ?? []) {
args.push('-E', pattern);
}
const extensions = mimeTypes.getSupportedFileExtensions();
for (const ext of extensions) {
// fd expects extensions without the dot
args.push('-e', ext.replace(/^\./, ''));
}
args.push(...pathsToCrawl);
const fdfind = spawn('fdfind', args);
const files: string[] = [];
let buffer = '';
let stderr = '';
fdfind.stdout.on('data', (data) => {
buffer += data.toString();
const lines = buffer.split('\n');
// Keep the last partial line in the buffer
buffer = lines.pop() || '';
for (const line of lines) {
const trimmed = line.trim();
if (trimmed.length > 0) {
files.push(trimmed);
}
}
});
fdfind.stderr.on('data', (data) => {
stderr += data.toString();
});
fdfind.on('close', (code) => {
// Process any remaining data in the buffer
if (buffer.length > 0) {
const trimmed = buffer.trim();
if (trimmed.length > 0) {
files.push(trimmed);
}
}
if (code === 0) {
resolve(files);
} else {
reject(new Error(`fdfind process exited with code ${code}: ${stderr}`));
}
});
fdfind.on('error', (error) => {
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
reject(
new Error('fdfind command not found. Please install fd-find: https://github.com/sharkdp/fd#installation'),
);
} else {
reject(new Error(`Failed to spawn fdfind: ${error.message}`));
}
});
return await walk({
paths: pathsToWalk.map((p) => path.resolve(p)),
includeHidden: includeHidden ?? false,
exclusionPatterns,
extensions,
});
}
+7 -7
View File
@@ -160,7 +160,7 @@ describe(LibraryService.name, () => {
const library = factory.library({ importPaths: ['/foo', '/bar'] });
mocks.library.get.mockResolvedValue(library);
mocks.storage.crawl.mockResolvedValue(['/data/user1/photo.jpg']);
mocks.storage.walk.mockResolvedValue(['/data/user1/photo.jpg']);
mocks.storage.stat.mockResolvedValue({ isDirectory: () => true } as Stats);
mocks.storage.checkFileExists.mockResolvedValue(true);
mocks.asset.filterNewExternalAssetPaths.mockResolvedValue(['/data/user1/photo.jpg']);
@@ -201,7 +201,7 @@ describe(LibraryService.name, () => {
await sut.handleQueueSyncFiles({ id: library.id });
expect(mocks.storage.crawl).toHaveBeenCalledWith({
expect(mocks.storage.walk).toHaveBeenCalledWith({
pathsToCrawl: [library.importPaths[1]],
exclusionPatterns: [],
includeHidden: false,
@@ -214,7 +214,7 @@ describe(LibraryService.name, () => {
const library = factory.library({ importPaths: ['/foo', '/bar'] });
mocks.library.get.mockResolvedValue(library);
mocks.storage.crawl.mockResolvedValue(['/data/user1/photo.jpg']);
mocks.storage.walk.mockResolvedValue(['/data/user1/photo.jpg']);
mocks.storage.stat.mockResolvedValue({ isDirectory: () => true } as Stats);
mocks.storage.checkFileExists.mockResolvedValue(true);
mocks.asset.filterNewExternalAssetPaths.mockResolvedValue(['/data/user1/photo.jpg']);
@@ -256,7 +256,7 @@ describe(LibraryService.name, () => {
await sut.handleQueueSyncFiles({ id: library.id });
expect(mocks.storage.crawl).toHaveBeenCalledWith({
expect(mocks.storage.walk).toHaveBeenCalledWith({
pathsToCrawl: [library.importPaths[1]],
exclusionPatterns: [],
includeHidden: false,
@@ -269,7 +269,7 @@ describe(LibraryService.name, () => {
const library = factory.library();
mocks.library.get.mockResolvedValue(library);
mocks.storage.crawl.mockResolvedValue([]);
mocks.storage.walk.mockResolvedValue([]);
mocks.asset.getLibraryAssetCount.mockResolvedValue(1);
mocks.asset.detectOfflineExternalAssets.mockResolvedValue({ numUpdatedRows: 1n });
@@ -287,7 +287,7 @@ describe(LibraryService.name, () => {
const library = factory.library();
mocks.library.get.mockResolvedValue(library);
mocks.storage.crawl.mockResolvedValue([]);
mocks.storage.walk.mockResolvedValue([]);
mocks.asset.getLibraryAssetCount.mockResolvedValue(0);
mocks.asset.detectOfflineExternalAssets.mockResolvedValue({ numUpdatedRows: 1n });
@@ -301,7 +301,7 @@ describe(LibraryService.name, () => {
const library = factory.library({ importPaths: ['/foo', '/bar'] });
mocks.library.get.mockResolvedValue(library);
mocks.storage.crawl.mockResolvedValue([]);
mocks.storage.walk.mockResolvedValue([]);
mocks.library.streamAssetIds.mockReturnValue(makeStream([assetStub.external]));
mocks.asset.getLibraryAssetCount.mockResolvedValue(1);
mocks.asset.detectOfflineExternalAssets.mockResolvedValue({ numUpdatedRows: 0n });
+4 -4
View File
@@ -649,18 +649,18 @@ export class LibraryService extends BaseService {
const crawlStart = Date.now();
const pathsOnDisk = await this.storageRepository.crawl({
pathsToCrawl: validImportPaths,
const pathsOnDisk = await this.storageRepository.walk({
pathsToWalk: validImportPaths,
includeHidden: false,
exclusionPatterns: library.exclusionPatterns,
});
let importCount = 0;
this.logger.log(
`Found ${pathsOnDisk.length} file(s) on disk in ${((Date.now() - crawlStart) / 1000).toFixed(2)}s, queuing for import...`,
);
let importCount = 0;
for (let i = 0; i < pathsOnDisk.length; i += JOBS_LIBRARY_PAGINATION_SIZE) {
const pathChunk = pathsOnDisk.slice(i, i + JOBS_LIBRARY_PAGINATION_SIZE);
const paths = await this.assetRepository.filterNewExternalAssetPaths(library.id, pathChunk);
@@ -68,8 +68,7 @@ export const newStorageRepositoryMock = (): Mocked<RepositoryInterface<StorageRe
readdir: vitest.fn(),
realpath: vitest.fn().mockImplementation((filepath: string) => Promise.resolve(filepath)),
stat: vitest.fn(),
crawl: vitest.fn(),
walk: vitest.fn().mockImplementation(async function* () {}),
walk: vitest.fn(),
rename: vitest.fn(),
copyFile: vitest.fn(),
utimes: vitest.fn(),