mirror of
https://github.com/immich-app/immich.git
synced 2026-05-25 00:12:33 -04:00
feat: crawl using ignore
This commit is contained in:
Generated
+9
@@ -343,6 +343,9 @@ importers:
|
||||
'@extism/extism':
|
||||
specifier: 2.0.0-rc13
|
||||
version: 2.0.0-rc13
|
||||
'@immich/walkrs':
|
||||
specifier: file:../../walkrs
|
||||
version: file:../walkrs
|
||||
'@nestjs/bullmq':
|
||||
specifier: ^11.0.1
|
||||
version: 11.0.4(@nestjs/common@11.1.12(class-transformer@0.5.1)(class-validator@0.14.3)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/core@11.1.12)(bullmq@5.66.5)
|
||||
@@ -3136,6 +3139,10 @@ packages:
|
||||
peerDependencies:
|
||||
svelte: ^5.0.0
|
||||
|
||||
'@immich/walkrs@file:../walkrs':
|
||||
resolution: {directory: ../walkrs, type: directory}
|
||||
engines: {pnpm: '>=10.0.0'}
|
||||
|
||||
'@inquirer/ansi@1.0.2':
|
||||
resolution: {integrity: sha512-S8qNSZiYzFd0wAcyG5AXCvUHC5Sr7xpZ9wZ2py9XR88jUz8wooStVx5M6dRzczbBWjic9NP7+rY0Xi7qqK/aMQ==}
|
||||
engines: {node: '>=18'}
|
||||
@@ -15779,6 +15786,8 @@ snapshots:
|
||||
transitivePeerDependencies:
|
||||
- '@sveltejs/kit'
|
||||
|
||||
'@immich/walkrs@file:../walkrs': {}
|
||||
|
||||
'@inquirer/ansi@1.0.2': {}
|
||||
|
||||
'@inquirer/ansi@2.0.3': {}
|
||||
|
||||
@@ -73,12 +73,6 @@ RUN --mount=type=cache,id=pnpm-plugins,target=/buildcache/pnpm-store \
|
||||
|
||||
FROM ghcr.io/immich-app/base-server-prod:202601131104@sha256:c649c5838b6348836d27db6d49cadbbc6157feae7a1a237180c3dec03577ba8f
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y fd-find && \
|
||||
ln -s /usr/bin/fdfind /usr/local/bin/fd && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
ENV NODE_ENV=production \
|
||||
NVIDIA_DRIVER_CAPABILITIES=all \
|
||||
|
||||
@@ -14,16 +14,13 @@ COPY ./package* ./pnpm* .pnpmfile.cjs /tmp/create-dep-cache/
|
||||
COPY ./web/package* ./web/pnpm* /tmp/create-dep-cache/web/
|
||||
COPY ./server/package* ./server/pnpm* /tmp/create-dep-cache/server/
|
||||
COPY ./open-api/typescript-sdk/package* ./open-api/typescript-sdk/pnpm* /tmp/create-dep-cache/open-api/typescript-sdk/
|
||||
COPY --from=walkrs ./package*.json /tmp/walkrs/
|
||||
COPY --from=walkrs ./Cargo.toml /tmp/walkrs/
|
||||
COPY --from=walkrs ./src /tmp/walkrs/src/
|
||||
WORKDIR /tmp/create-dep-cache
|
||||
RUN pnpm fetch && rm -rf /tmp/create-dep-cache && chmod -R o+rw /buildcache
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y fd-find && \
|
||||
ln -s /usr/bin/fdfind /usr/local/bin/fd && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PATH="${PATH}:/usr/src/app/server/bin:/usr/src/app/web/bin" \
|
||||
IMMICH_ENV=development \
|
||||
NVIDIA_DRIVER_CAPABILITIES=all \
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@extism/extism": "2.0.0-rc13",
|
||||
"@immich/walkrs": "file:../../walkrs",
|
||||
"@nestjs/bullmq": "^11.0.1",
|
||||
"@nestjs/common": "^11.0.4",
|
||||
"@nestjs/core": "^11.0.4",
|
||||
|
||||
@@ -54,11 +54,10 @@ export class UpdateLibraryDto {
|
||||
exclusionPatterns?: string[];
|
||||
}
|
||||
|
||||
export interface CrawlOptionsDto {
|
||||
pathsToCrawl: string[];
|
||||
export interface WalkOptionsDto {
|
||||
pathsToWalk: string[];
|
||||
includeHidden?: boolean;
|
||||
exclusionPatterns?: string[];
|
||||
take?: number;
|
||||
}
|
||||
|
||||
export class ValidateLibraryDto {
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import mockfs from 'mock-fs';
|
||||
import { CrawlOptionsDto } from 'src/dtos/library.dto';
|
||||
import { WalkOptionsDto } from 'src/dtos/library.dto';
|
||||
import { LoggingRepository } from 'src/repositories/logging.repository';
|
||||
import { StorageRepository } from 'src/repositories/storage.repository';
|
||||
import { automock } from 'test/utils';
|
||||
|
||||
interface Test {
|
||||
test: string;
|
||||
options: CrawlOptionsDto;
|
||||
options: WalkOptionsDto;
|
||||
files: Record<string, boolean>;
|
||||
}
|
||||
|
||||
@@ -16,14 +16,14 @@ const tests: Test[] = [
|
||||
{
|
||||
test: 'should return empty when crawling an empty path list',
|
||||
options: {
|
||||
pathsToCrawl: [],
|
||||
pathsToWalk: [],
|
||||
},
|
||||
files: {},
|
||||
},
|
||||
{
|
||||
test: 'should crawl a single path',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/'],
|
||||
pathsToWalk: ['/photos/'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
@@ -32,7 +32,7 @@ const tests: Test[] = [
|
||||
{
|
||||
test: 'should exclude by file extension',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/'],
|
||||
pathsToWalk: ['/photos/'],
|
||||
exclusionPatterns: ['**/*.tif'],
|
||||
},
|
||||
files: {
|
||||
@@ -43,7 +43,7 @@ const tests: Test[] = [
|
||||
{
|
||||
test: 'should exclude by file extension without case sensitivity',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/'],
|
||||
pathsToWalk: ['/photos/'],
|
||||
exclusionPatterns: ['**/*.TIF'],
|
||||
},
|
||||
files: {
|
||||
@@ -54,7 +54,7 @@ const tests: Test[] = [
|
||||
{
|
||||
test: 'should exclude by folder',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/'],
|
||||
pathsToWalk: ['/photos/'],
|
||||
exclusionPatterns: ['**/raw/**'],
|
||||
},
|
||||
files: {
|
||||
@@ -68,7 +68,7 @@ const tests: Test[] = [
|
||||
{
|
||||
test: 'should crawl multiple paths',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/', '/images/', '/albums/'],
|
||||
pathsToWalk: ['/photos/', '/images/', '/albums/'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image1.jpg': true,
|
||||
@@ -79,7 +79,7 @@ const tests: Test[] = [
|
||||
{
|
||||
test: 'should crawl a single path without trailing slash',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos'],
|
||||
pathsToWalk: ['/photos'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
@@ -88,7 +88,7 @@ const tests: Test[] = [
|
||||
{
|
||||
test: 'should crawl a single path',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/'],
|
||||
pathsToWalk: ['/photos/'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
@@ -100,7 +100,7 @@ const tests: Test[] = [
|
||||
{
|
||||
test: 'should filter file extensions',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/'],
|
||||
pathsToWalk: ['/photos/'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
@@ -111,7 +111,7 @@ const tests: Test[] = [
|
||||
{
|
||||
test: 'should include photo and video extensions',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/', '/videos/'],
|
||||
pathsToWalk: ['/photos/', '/videos/'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
@@ -133,7 +133,7 @@ const tests: Test[] = [
|
||||
{
|
||||
test: 'should check file extensions without case sensitivity',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/'],
|
||||
pathsToWalk: ['/photos/'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
@@ -150,7 +150,7 @@ const tests: Test[] = [
|
||||
{
|
||||
test: 'should normalize the path',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/1/../2'],
|
||||
pathsToWalk: ['/photos/1/../2'],
|
||||
},
|
||||
files: {
|
||||
'/photos/1/image.jpg': false,
|
||||
@@ -160,7 +160,7 @@ const tests: Test[] = [
|
||||
{
|
||||
test: 'should return absolute paths',
|
||||
options: {
|
||||
pathsToCrawl: ['photos'],
|
||||
pathsToWalk: ['photos'],
|
||||
},
|
||||
files: {
|
||||
[`${cwd}/photos/1.jpg`]: true,
|
||||
@@ -171,7 +171,7 @@ const tests: Test[] = [
|
||||
{
|
||||
test: 'should support special characters in paths',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos (new)'],
|
||||
pathsToWalk: ['/photos (new)'],
|
||||
},
|
||||
files: {
|
||||
['/photos (new)/1.jpg']: true,
|
||||
@@ -196,7 +196,7 @@ describe(StorageRepository.name, () => {
|
||||
it(test, async () => {
|
||||
mockfs(Object.fromEntries(Object.keys(files).map((file) => [file, ''])));
|
||||
|
||||
const actual = await sut.crawl(options);
|
||||
const actual = await sut.walk(options);
|
||||
const expected = Object.entries(files)
|
||||
.filter((entry) => entry[1])
|
||||
.map(([file]) => file);
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
import { walk } from '@immich/walkrs';
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import archiver from 'archiver';
|
||||
import chokidar, { ChokidarOptions } from 'chokidar';
|
||||
import { spawn } from 'node:child_process';
|
||||
import { constants, createReadStream, createWriteStream, existsSync, mkdirSync, ReadOptionsWithBuffer } from 'node:fs';
|
||||
import fs from 'node:fs/promises';
|
||||
import path from 'node:path';
|
||||
import { PassThrough, Readable, Writable } from 'node:stream';
|
||||
import { createGunzip, createGzip } from 'node:zlib';
|
||||
import { CrawlOptionsDto } from 'src/dtos/library.dto';
|
||||
import { WalkOptionsDto } from 'src/dtos/library.dto';
|
||||
import { LoggingRepository } from 'src/repositories/logging.repository';
|
||||
import { mimeTypes } from 'src/utils/mime-types';
|
||||
|
||||
@@ -198,86 +198,19 @@ export class StorageRepository {
|
||||
};
|
||||
}
|
||||
|
||||
async crawl(crawlOptions: CrawlOptionsDto): Promise<string[]> {
|
||||
const { pathsToCrawl, exclusionPatterns, includeHidden } = crawlOptions;
|
||||
if (pathsToCrawl.length === 0) {
|
||||
async walk(walkOptions: WalkOptionsDto): Promise<string[]> {
|
||||
const { pathsToWalk, exclusionPatterns, includeHidden } = walkOptions;
|
||||
if (pathsToWalk.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const args: string[] = [
|
||||
'-t',
|
||||
'f', // File type: only files
|
||||
'-a', // Absolute paths
|
||||
'-i', // Case insensitive
|
||||
'.', // Search pattern: match all files
|
||||
];
|
||||
const extensions = mimeTypes.getSupportedFileExtensions().map((ext) => ext.toLowerCase());
|
||||
|
||||
if (includeHidden) {
|
||||
args.push('-H');
|
||||
}
|
||||
|
||||
for (const pattern of exclusionPatterns ?? []) {
|
||||
args.push('-E', pattern);
|
||||
}
|
||||
|
||||
const extensions = mimeTypes.getSupportedFileExtensions();
|
||||
for (const ext of extensions) {
|
||||
// fd expects extensions without the dot
|
||||
args.push('-e', ext.replace(/^\./, ''));
|
||||
}
|
||||
|
||||
args.push(...pathsToCrawl);
|
||||
|
||||
const fdfind = spawn('fdfind', args);
|
||||
|
||||
const files: string[] = [];
|
||||
let buffer = '';
|
||||
let stderr = '';
|
||||
|
||||
fdfind.stdout.on('data', (data) => {
|
||||
buffer += data.toString();
|
||||
const lines = buffer.split('\n');
|
||||
// Keep the last partial line in the buffer
|
||||
buffer = lines.pop() || '';
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed.length > 0) {
|
||||
files.push(trimmed);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
fdfind.stderr.on('data', (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
fdfind.on('close', (code) => {
|
||||
// Process any remaining data in the buffer
|
||||
if (buffer.length > 0) {
|
||||
const trimmed = buffer.trim();
|
||||
if (trimmed.length > 0) {
|
||||
files.push(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
if (code === 0) {
|
||||
resolve(files);
|
||||
} else {
|
||||
reject(new Error(`fdfind process exited with code ${code}: ${stderr}`));
|
||||
}
|
||||
});
|
||||
|
||||
fdfind.on('error', (error) => {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
reject(
|
||||
new Error('fdfind command not found. Please install fd-find: https://github.com/sharkdp/fd#installation'),
|
||||
);
|
||||
} else {
|
||||
reject(new Error(`Failed to spawn fdfind: ${error.message}`));
|
||||
}
|
||||
});
|
||||
return await walk({
|
||||
paths: pathsToWalk.map((p) => path.resolve(p)),
|
||||
includeHidden: includeHidden ?? false,
|
||||
exclusionPatterns,
|
||||
extensions,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -160,7 +160,7 @@ describe(LibraryService.name, () => {
|
||||
const library = factory.library({ importPaths: ['/foo', '/bar'] });
|
||||
|
||||
mocks.library.get.mockResolvedValue(library);
|
||||
mocks.storage.crawl.mockResolvedValue(['/data/user1/photo.jpg']);
|
||||
mocks.storage.walk.mockResolvedValue(['/data/user1/photo.jpg']);
|
||||
mocks.storage.stat.mockResolvedValue({ isDirectory: () => true } as Stats);
|
||||
mocks.storage.checkFileExists.mockResolvedValue(true);
|
||||
mocks.asset.filterNewExternalAssetPaths.mockResolvedValue(['/data/user1/photo.jpg']);
|
||||
@@ -201,7 +201,7 @@ describe(LibraryService.name, () => {
|
||||
|
||||
await sut.handleQueueSyncFiles({ id: library.id });
|
||||
|
||||
expect(mocks.storage.crawl).toHaveBeenCalledWith({
|
||||
expect(mocks.storage.walk).toHaveBeenCalledWith({
|
||||
pathsToCrawl: [library.importPaths[1]],
|
||||
exclusionPatterns: [],
|
||||
includeHidden: false,
|
||||
@@ -214,7 +214,7 @@ describe(LibraryService.name, () => {
|
||||
const library = factory.library({ importPaths: ['/foo', '/bar'] });
|
||||
|
||||
mocks.library.get.mockResolvedValue(library);
|
||||
mocks.storage.crawl.mockResolvedValue(['/data/user1/photo.jpg']);
|
||||
mocks.storage.walk.mockResolvedValue(['/data/user1/photo.jpg']);
|
||||
mocks.storage.stat.mockResolvedValue({ isDirectory: () => true } as Stats);
|
||||
mocks.storage.checkFileExists.mockResolvedValue(true);
|
||||
mocks.asset.filterNewExternalAssetPaths.mockResolvedValue(['/data/user1/photo.jpg']);
|
||||
@@ -256,7 +256,7 @@ describe(LibraryService.name, () => {
|
||||
|
||||
await sut.handleQueueSyncFiles({ id: library.id });
|
||||
|
||||
expect(mocks.storage.crawl).toHaveBeenCalledWith({
|
||||
expect(mocks.storage.walk).toHaveBeenCalledWith({
|
||||
pathsToCrawl: [library.importPaths[1]],
|
||||
exclusionPatterns: [],
|
||||
includeHidden: false,
|
||||
@@ -269,7 +269,7 @@ describe(LibraryService.name, () => {
|
||||
const library = factory.library();
|
||||
|
||||
mocks.library.get.mockResolvedValue(library);
|
||||
mocks.storage.crawl.mockResolvedValue([]);
|
||||
mocks.storage.walk.mockResolvedValue([]);
|
||||
mocks.asset.getLibraryAssetCount.mockResolvedValue(1);
|
||||
mocks.asset.detectOfflineExternalAssets.mockResolvedValue({ numUpdatedRows: 1n });
|
||||
|
||||
@@ -287,7 +287,7 @@ describe(LibraryService.name, () => {
|
||||
const library = factory.library();
|
||||
|
||||
mocks.library.get.mockResolvedValue(library);
|
||||
mocks.storage.crawl.mockResolvedValue([]);
|
||||
mocks.storage.walk.mockResolvedValue([]);
|
||||
mocks.asset.getLibraryAssetCount.mockResolvedValue(0);
|
||||
mocks.asset.detectOfflineExternalAssets.mockResolvedValue({ numUpdatedRows: 1n });
|
||||
|
||||
@@ -301,7 +301,7 @@ describe(LibraryService.name, () => {
|
||||
const library = factory.library({ importPaths: ['/foo', '/bar'] });
|
||||
|
||||
mocks.library.get.mockResolvedValue(library);
|
||||
mocks.storage.crawl.mockResolvedValue([]);
|
||||
mocks.storage.walk.mockResolvedValue([]);
|
||||
mocks.library.streamAssetIds.mockReturnValue(makeStream([assetStub.external]));
|
||||
mocks.asset.getLibraryAssetCount.mockResolvedValue(1);
|
||||
mocks.asset.detectOfflineExternalAssets.mockResolvedValue({ numUpdatedRows: 0n });
|
||||
|
||||
@@ -649,18 +649,18 @@ export class LibraryService extends BaseService {
|
||||
|
||||
const crawlStart = Date.now();
|
||||
|
||||
const pathsOnDisk = await this.storageRepository.crawl({
|
||||
pathsToCrawl: validImportPaths,
|
||||
const pathsOnDisk = await this.storageRepository.walk({
|
||||
pathsToWalk: validImportPaths,
|
||||
includeHidden: false,
|
||||
exclusionPatterns: library.exclusionPatterns,
|
||||
});
|
||||
|
||||
let importCount = 0;
|
||||
|
||||
this.logger.log(
|
||||
`Found ${pathsOnDisk.length} file(s) on disk in ${((Date.now() - crawlStart) / 1000).toFixed(2)}s, queuing for import...`,
|
||||
);
|
||||
|
||||
let importCount = 0;
|
||||
|
||||
for (let i = 0; i < pathsOnDisk.length; i += JOBS_LIBRARY_PAGINATION_SIZE) {
|
||||
const pathChunk = pathsOnDisk.slice(i, i + JOBS_LIBRARY_PAGINATION_SIZE);
|
||||
const paths = await this.assetRepository.filterNewExternalAssetPaths(library.id, pathChunk);
|
||||
|
||||
@@ -68,8 +68,7 @@ export const newStorageRepositoryMock = (): Mocked<RepositoryInterface<StorageRe
|
||||
readdir: vitest.fn(),
|
||||
realpath: vitest.fn().mockImplementation((filepath: string) => Promise.resolve(filepath)),
|
||||
stat: vitest.fn(),
|
||||
crawl: vitest.fn(),
|
||||
walk: vitest.fn().mockImplementation(async function* () {}),
|
||||
walk: vitest.fn(),
|
||||
rename: vitest.fn(),
|
||||
copyFile: vitest.fn(),
|
||||
utimes: vitest.fn(),
|
||||
|
||||
Reference in New Issue
Block a user