mirror of
				https://github.com/immich-app/immich.git
				synced 2025-11-03 19:29:32 -05:00 
			
		
		
		
	refactor(server): filesystem crawl (#4395)
Co-authored-by: Jonathan Jogenfors <jonathan@jogenfors.se>
This commit is contained in:
		
							parent
							
								
									9070a361bc
								
							
						
					
					
						commit
						9033e7f179
					
				@ -2,208 +2,204 @@ import { CrawlOptionsDto } from '@app/domain';
 | 
			
		||||
import mockfs from 'mock-fs';
 | 
			
		||||
import { FilesystemProvider } from './filesystem.provider';
 | 
			
		||||
 | 
			
		||||
interface Test {
 | 
			
		||||
  test: string;
 | 
			
		||||
  options: CrawlOptionsDto;
 | 
			
		||||
  files: Record<string, boolean>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const cwd = process.cwd();
 | 
			
		||||
 | 
			
		||||
const tests: Test[] = [
 | 
			
		||||
  {
 | 
			
		||||
    test: 'should return empty when crawling an empty path list',
 | 
			
		||||
    options: {
 | 
			
		||||
      pathsToCrawl: [],
 | 
			
		||||
    },
 | 
			
		||||
    files: {},
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    test: 'should crawl a single path',
 | 
			
		||||
    options: {
 | 
			
		||||
      pathsToCrawl: ['/photos/'],
 | 
			
		||||
    },
 | 
			
		||||
    files: {
 | 
			
		||||
      '/photos/image.jpg': true,
 | 
			
		||||
    },
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    test: 'should exclude by file extension',
 | 
			
		||||
    options: {
 | 
			
		||||
      pathsToCrawl: ['/photos/'],
 | 
			
		||||
      exclusionPatterns: ['**/*.tif'],
 | 
			
		||||
    },
 | 
			
		||||
    files: {
 | 
			
		||||
      '/photos/image.jpg': true,
 | 
			
		||||
      '/photos/image.tif': false,
 | 
			
		||||
    },
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    test: 'should exclude by file extension without case sensitivity',
 | 
			
		||||
    options: {
 | 
			
		||||
      pathsToCrawl: ['/photos/'],
 | 
			
		||||
      exclusionPatterns: ['**/*.TIF'],
 | 
			
		||||
    },
 | 
			
		||||
    files: {
 | 
			
		||||
      '/photos/image.jpg': true,
 | 
			
		||||
      '/photos/image.tif': false,
 | 
			
		||||
    },
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    test: 'should exclude by folder',
 | 
			
		||||
    options: {
 | 
			
		||||
      pathsToCrawl: ['/photos/'],
 | 
			
		||||
      exclusionPatterns: ['**/raw/**'],
 | 
			
		||||
    },
 | 
			
		||||
    files: {
 | 
			
		||||
      '/photos/image.jpg': true,
 | 
			
		||||
      '/photos/raw/image.jpg': false,
 | 
			
		||||
      '/photos/raw2/image.jpg': true,
 | 
			
		||||
      '/photos/folder/raw/image.jpg': false,
 | 
			
		||||
      '/photos/crawl/image.jpg': true,
 | 
			
		||||
    },
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    test: 'should crawl multiple paths',
 | 
			
		||||
    options: {
 | 
			
		||||
      pathsToCrawl: ['/photos/', '/images/', '/albums/'],
 | 
			
		||||
    },
 | 
			
		||||
    files: {
 | 
			
		||||
      '/photos/image1.jpg': true,
 | 
			
		||||
      '/images/image2.jpg': true,
 | 
			
		||||
      '/albums/image3.jpg': true,
 | 
			
		||||
    },
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    test: 'should support globbing paths',
 | 
			
		||||
    options: {
 | 
			
		||||
      pathsToCrawl: ['/photos*'],
 | 
			
		||||
    },
 | 
			
		||||
    files: {
 | 
			
		||||
      '/photos1/image1.jpg': true,
 | 
			
		||||
      '/photos2/image2.jpg': true,
 | 
			
		||||
      '/images/image3.jpg': false,
 | 
			
		||||
    },
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    test: 'should crawl a single path without trailing slash',
 | 
			
		||||
    options: {
 | 
			
		||||
      pathsToCrawl: ['/photos'],
 | 
			
		||||
    },
 | 
			
		||||
    files: {
 | 
			
		||||
      '/photos/image.jpg': true,
 | 
			
		||||
    },
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    test: 'should crawl a single path',
 | 
			
		||||
    options: {
 | 
			
		||||
      pathsToCrawl: ['/photos/'],
 | 
			
		||||
    },
 | 
			
		||||
    files: {
 | 
			
		||||
      '/photos/image.jpg': true,
 | 
			
		||||
      '/photos/subfolder/image1.jpg': true,
 | 
			
		||||
      '/photos/subfolder/image2.jpg': true,
 | 
			
		||||
      '/image1.jpg': false,
 | 
			
		||||
    },
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    test: 'should filter file extensions',
 | 
			
		||||
    options: {
 | 
			
		||||
      pathsToCrawl: ['/photos/'],
 | 
			
		||||
    },
 | 
			
		||||
    files: {
 | 
			
		||||
      '/photos/image.jpg': true,
 | 
			
		||||
      '/photos/image.txt': false,
 | 
			
		||||
      '/photos/1': false,
 | 
			
		||||
    },
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    test: 'should include photo and video extensions',
 | 
			
		||||
    options: {
 | 
			
		||||
      pathsToCrawl: ['/photos/', '/videos/'],
 | 
			
		||||
    },
 | 
			
		||||
    files: {
 | 
			
		||||
      '/photos/image.jpg': true,
 | 
			
		||||
      '/photos/image.jpeg': true,
 | 
			
		||||
      '/photos/image.heic': true,
 | 
			
		||||
      '/photos/image.heif': true,
 | 
			
		||||
      '/photos/image.png': true,
 | 
			
		||||
      '/photos/image.gif': true,
 | 
			
		||||
      '/photos/image.tif': true,
 | 
			
		||||
      '/photos/image.tiff': true,
 | 
			
		||||
      '/photos/image.webp': true,
 | 
			
		||||
      '/photos/image.dng': true,
 | 
			
		||||
      '/photos/image.nef': true,
 | 
			
		||||
      '/videos/video.mp4': true,
 | 
			
		||||
      '/videos/video.mov': true,
 | 
			
		||||
      '/videos/video.webm': true,
 | 
			
		||||
    },
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    test: 'should check file extensions without case sensitivity',
 | 
			
		||||
    options: {
 | 
			
		||||
      pathsToCrawl: ['/photos/'],
 | 
			
		||||
    },
 | 
			
		||||
    files: {
 | 
			
		||||
      '/photos/image.jpg': true,
 | 
			
		||||
      '/photos/image.Jpg': true,
 | 
			
		||||
      '/photos/image.jpG': true,
 | 
			
		||||
      '/photos/image.JPG': true,
 | 
			
		||||
      '/photos/image.jpEg': true,
 | 
			
		||||
      '/photos/image.TIFF': true,
 | 
			
		||||
      '/photos/image.tif': true,
 | 
			
		||||
      '/photos/image.dng': true,
 | 
			
		||||
      '/photos/image.NEF': true,
 | 
			
		||||
    },
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    test: 'should normalize the path',
 | 
			
		||||
    options: {
 | 
			
		||||
      pathsToCrawl: ['/photos/1/../2'],
 | 
			
		||||
    },
 | 
			
		||||
    files: {
 | 
			
		||||
      '/photos/1/image.jpg': false,
 | 
			
		||||
      '/photos/2/image.jpg': true,
 | 
			
		||||
    },
 | 
			
		||||
  },
 | 
			
		||||
  {
 | 
			
		||||
    test: 'should return absolute paths',
 | 
			
		||||
    options: {
 | 
			
		||||
      pathsToCrawl: ['photos'],
 | 
			
		||||
    },
 | 
			
		||||
    files: {
 | 
			
		||||
      [`${cwd}/photos/1.jpg`]: true,
 | 
			
		||||
      [`${cwd}/photos/2.jpg`]: true,
 | 
			
		||||
      [`/photos/3.jpg`]: false,
 | 
			
		||||
    },
 | 
			
		||||
  },
 | 
			
		||||
];
 | 
			
		||||
 | 
			
		||||
describe(FilesystemProvider.name, () => {
 | 
			
		||||
  const sut: FilesystemProvider = new FilesystemProvider();
 | 
			
		||||
  const sut = new FilesystemProvider();
 | 
			
		||||
 | 
			
		||||
  describe('crawl', () => {
 | 
			
		||||
    it('should return empty wnen crawling an empty path list', async () => {
 | 
			
		||||
      const options = new CrawlOptionsDto();
 | 
			
		||||
      options.pathsToCrawl = [];
 | 
			
		||||
      const paths: string[] = await sut.crawl(options);
 | 
			
		||||
      expect(paths).toHaveLength(0);
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    it('should crawl a single path', async () => {
 | 
			
		||||
      mockfs({
 | 
			
		||||
        '/photos/image.jpg': '',
 | 
			
		||||
      });
 | 
			
		||||
 | 
			
		||||
      const options = new CrawlOptionsDto();
 | 
			
		||||
      options.pathsToCrawl = ['/photos/'];
 | 
			
		||||
      const paths: string[] = await sut.crawl(options);
 | 
			
		||||
      expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    it('should exclude by file extension', async () => {
 | 
			
		||||
      mockfs({
 | 
			
		||||
        '/photos/image.jpg': '',
 | 
			
		||||
        '/photos/image.tif': '',
 | 
			
		||||
      });
 | 
			
		||||
 | 
			
		||||
      const options = new CrawlOptionsDto();
 | 
			
		||||
      options.pathsToCrawl = ['/photos/'];
 | 
			
		||||
      options.exclusionPatterns = ['**/*.tif'];
 | 
			
		||||
      const paths: string[] = await sut.crawl(options);
 | 
			
		||||
      expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    it('should exclude by file extension without case sensitivity', async () => {
 | 
			
		||||
      mockfs({
 | 
			
		||||
        '/photos/image.jpg': '',
 | 
			
		||||
        '/photos/image.tif': '',
 | 
			
		||||
      });
 | 
			
		||||
 | 
			
		||||
      const options = new CrawlOptionsDto();
 | 
			
		||||
      options.pathsToCrawl = ['/photos/'];
 | 
			
		||||
      options.exclusionPatterns = ['**/*.TIF'];
 | 
			
		||||
      const paths: string[] = await sut.crawl(options);
 | 
			
		||||
      expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    it('should exclude by folder', async () => {
 | 
			
		||||
      mockfs({
 | 
			
		||||
        '/photos/image.jpg': '',
 | 
			
		||||
        '/photos/raw/image.jpg': '',
 | 
			
		||||
        '/photos/raw2/image.jpg': '',
 | 
			
		||||
        '/photos/folder/raw/image.jpg': '',
 | 
			
		||||
        '/photos/crawl/image.jpg': '',
 | 
			
		||||
      });
 | 
			
		||||
 | 
			
		||||
      const options = new CrawlOptionsDto();
 | 
			
		||||
      options.pathsToCrawl = ['/photos/'];
 | 
			
		||||
      options.exclusionPatterns = ['**/raw/**'];
 | 
			
		||||
      const paths: string[] = await sut.crawl(options);
 | 
			
		||||
      expect(paths.sort()).toEqual(['/photos/image.jpg', '/photos/raw2/image.jpg', '/photos/crawl/image.jpg'].sort());
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    it('should crawl multiple paths', async () => {
 | 
			
		||||
      mockfs({
 | 
			
		||||
        '/photos/image1.jpg': '',
 | 
			
		||||
        '/images/image2.jpg': '',
 | 
			
		||||
        '/albums/image3.jpg': '',
 | 
			
		||||
      });
 | 
			
		||||
      const options = new CrawlOptionsDto();
 | 
			
		||||
      options.pathsToCrawl = ['/photos/', '/images/', '/albums/'];
 | 
			
		||||
      const paths: string[] = await sut.crawl(options);
 | 
			
		||||
      expect(paths.sort()).toEqual(['/photos/image1.jpg', '/images/image2.jpg', '/albums/image3.jpg'].sort());
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    it('should support globbing paths', async () => {
 | 
			
		||||
      mockfs({
 | 
			
		||||
        '/photos1/image1.jpg': '',
 | 
			
		||||
        '/photos2/image2.jpg': '',
 | 
			
		||||
        '/images/image3.jpg': '',
 | 
			
		||||
      });
 | 
			
		||||
      const options = new CrawlOptionsDto();
 | 
			
		||||
      options.pathsToCrawl = ['/photos*'];
 | 
			
		||||
      const paths: string[] = await sut.crawl(options);
 | 
			
		||||
      expect(paths.sort()).toEqual(['/photos1/image1.jpg', '/photos2/image2.jpg'].sort());
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    it('should crawl a single path without trailing slash', async () => {
 | 
			
		||||
      mockfs({
 | 
			
		||||
        '/photos/image.jpg': '',
 | 
			
		||||
      });
 | 
			
		||||
      const options = new CrawlOptionsDto();
 | 
			
		||||
      options.pathsToCrawl = ['/photos'];
 | 
			
		||||
      const paths: string[] = await sut.crawl(options);
 | 
			
		||||
      expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    // TODO: test for hidden paths (not yet implemented)
 | 
			
		||||
 | 
			
		||||
    it('should crawl a single path', async () => {
 | 
			
		||||
      mockfs({
 | 
			
		||||
        '/photos/image.jpg': '',
 | 
			
		||||
        '/photos/subfolder/image1.jpg': '',
 | 
			
		||||
        '/photos/subfolder/image2.jpg': '',
 | 
			
		||||
        '/image1.jpg': '',
 | 
			
		||||
      });
 | 
			
		||||
      const options = new CrawlOptionsDto();
 | 
			
		||||
      options.pathsToCrawl = ['/photos/'];
 | 
			
		||||
      const paths: string[] = await sut.crawl(options);
 | 
			
		||||
      expect(paths.sort()).toEqual(
 | 
			
		||||
        ['/photos/image.jpg', '/photos/subfolder/image1.jpg', '/photos/subfolder/image2.jpg'].sort(),
 | 
			
		||||
      );
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    it('should filter file extensions', async () => {
 | 
			
		||||
      mockfs({
 | 
			
		||||
        '/photos/image.jpg': '',
 | 
			
		||||
        '/photos/image.txt': '',
 | 
			
		||||
        '/photos/1': '',
 | 
			
		||||
      });
 | 
			
		||||
      const options = new CrawlOptionsDto();
 | 
			
		||||
      options.pathsToCrawl = ['/photos/'];
 | 
			
		||||
      const paths: string[] = await sut.crawl(options);
 | 
			
		||||
      expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    it('should include photo and video extensions', async () => {
 | 
			
		||||
      mockfs({
 | 
			
		||||
        '/photos/image.jpg': '',
 | 
			
		||||
        '/photos/image.jpeg': '',
 | 
			
		||||
        '/photos/image.heic': '',
 | 
			
		||||
        '/photos/image.heif': '',
 | 
			
		||||
        '/photos/image.png': '',
 | 
			
		||||
        '/photos/image.gif': '',
 | 
			
		||||
        '/photos/image.tif': '',
 | 
			
		||||
        '/photos/image.tiff': '',
 | 
			
		||||
        '/photos/image.webp': '',
 | 
			
		||||
        '/photos/image.dng': '',
 | 
			
		||||
        '/photos/image.nef': '',
 | 
			
		||||
        '/videos/video.mp4': '',
 | 
			
		||||
        '/videos/video.mov': '',
 | 
			
		||||
        '/videos/video.webm': '',
 | 
			
		||||
      });
 | 
			
		||||
 | 
			
		||||
      const options = new CrawlOptionsDto();
 | 
			
		||||
      options.pathsToCrawl = ['/photos/', '/videos/'];
 | 
			
		||||
      const paths: string[] = await sut.crawl(options);
 | 
			
		||||
 | 
			
		||||
      expect(paths.sort()).toEqual(
 | 
			
		||||
        [
 | 
			
		||||
          '/photos/image.jpg',
 | 
			
		||||
          '/photos/image.jpeg',
 | 
			
		||||
          '/photos/image.heic',
 | 
			
		||||
          '/photos/image.heif',
 | 
			
		||||
          '/photos/image.png',
 | 
			
		||||
          '/photos/image.gif',
 | 
			
		||||
          '/photos/image.tif',
 | 
			
		||||
          '/photos/image.tiff',
 | 
			
		||||
          '/photos/image.webp',
 | 
			
		||||
          '/photos/image.dng',
 | 
			
		||||
          '/photos/image.nef',
 | 
			
		||||
          '/videos/video.mp4',
 | 
			
		||||
          '/videos/video.mov',
 | 
			
		||||
          '/videos/video.webm',
 | 
			
		||||
        ].sort(),
 | 
			
		||||
      );
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    it('should check file extensions without case sensitivity', async () => {
 | 
			
		||||
      mockfs({
 | 
			
		||||
        '/photos/image.jpg': '',
 | 
			
		||||
        '/photos/image.Jpg': '',
 | 
			
		||||
        '/photos/image.jpG': '',
 | 
			
		||||
        '/photos/image.JPG': '',
 | 
			
		||||
        '/photos/image.jpEg': '',
 | 
			
		||||
        '/photos/image.TIFF': '',
 | 
			
		||||
        '/photos/image.tif': '',
 | 
			
		||||
        '/photos/image.dng': '',
 | 
			
		||||
        '/photos/image.NEF': '',
 | 
			
		||||
      });
 | 
			
		||||
 | 
			
		||||
      const options = new CrawlOptionsDto();
 | 
			
		||||
      options.pathsToCrawl = ['/photos/'];
 | 
			
		||||
      const paths: string[] = await sut.crawl(options);
 | 
			
		||||
      expect(paths.sort()).toEqual(
 | 
			
		||||
        [
 | 
			
		||||
          '/photos/image.jpg',
 | 
			
		||||
          '/photos/image.Jpg',
 | 
			
		||||
          '/photos/image.jpG',
 | 
			
		||||
          '/photos/image.JPG',
 | 
			
		||||
          '/photos/image.jpEg',
 | 
			
		||||
          '/photos/image.TIFF',
 | 
			
		||||
          '/photos/image.tif',
 | 
			
		||||
          '/photos/image.dng',
 | 
			
		||||
          '/photos/image.NEF',
 | 
			
		||||
        ].sort(),
 | 
			
		||||
      );
 | 
			
		||||
    });
 | 
			
		||||
  console.log(process.cwd());
 | 
			
		||||
 | 
			
		||||
  afterEach(() => {
 | 
			
		||||
    mockfs.restore();
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  describe('crawl', () => {
 | 
			
		||||
    for (const { test, options, files } of tests) {
 | 
			
		||||
      it(test, async () => {
 | 
			
		||||
        mockfs(Object.fromEntries(Object.keys(files).map((file) => [file, ''])));
 | 
			
		||||
 | 
			
		||||
        const actual = await sut.crawl(options);
 | 
			
		||||
        const expected = Object.entries(files)
 | 
			
		||||
          .filter((entry) => entry[1])
 | 
			
		||||
          .map(([file]) => file);
 | 
			
		||||
 | 
			
		||||
        expect(actual.sort()).toEqual(expected.sort());
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
  });
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
@ -111,24 +111,21 @@ export class FilesystemProvider implements IStorageRepository {
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async crawl(crawlOptions: CrawlOptionsDto): Promise<string[]> {
 | 
			
		||||
    const pathsToCrawl = crawlOptions.pathsToCrawl;
 | 
			
		||||
 | 
			
		||||
    let paths: string;
 | 
			
		||||
  crawl(crawlOptions: CrawlOptionsDto): Promise<string[]> {
 | 
			
		||||
    const { pathsToCrawl, exclusionPatterns } = crawlOptions;
 | 
			
		||||
    if (!pathsToCrawl) {
 | 
			
		||||
      // No paths to crawl, return empty list
 | 
			
		||||
      return [];
 | 
			
		||||
    } else if (pathsToCrawl.length === 1) {
 | 
			
		||||
      paths = pathsToCrawl[0];
 | 
			
		||||
    } else {
 | 
			
		||||
      paths = '{' + pathsToCrawl.join(',') + '}';
 | 
			
		||||
      return Promise.resolve([]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    paths = paths + '/**/*{' + mimeTypes.getSupportedFileExtensions().join(',') + '}';
 | 
			
		||||
    const base = pathsToCrawl.length === 1 ? pathsToCrawl[0] : `{${pathsToCrawl.join(',')}}`;
 | 
			
		||||
    const extensions = `*{${mimeTypes.getSupportedFileExtensions().join(',')}}`;
 | 
			
		||||
 | 
			
		||||
    return (await glob(paths, { nocase: true, nodir: true, ignore: crawlOptions.exclusionPatterns })).map((assetPath) =>
 | 
			
		||||
      path.normalize(assetPath),
 | 
			
		||||
    );
 | 
			
		||||
    return glob(`${base}/**/${extensions}`, {
 | 
			
		||||
      absolute: true,
 | 
			
		||||
      nocase: true,
 | 
			
		||||
      nodir: true,
 | 
			
		||||
      ignore: exclusionPatterns,
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  readdir = readdir;
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user