refactor(server): use exiftool for file date metadata (#16453)

* use exiftool for file date metadata

* handle tag not existing in exifinfo (?)

* update medium tests

* fix typo

* set file size too

* set file size only if undefined
This commit is contained in:
Mert 2025-03-06 11:47:12 -05:00 committed by GitHub
parent d01b7a0d67
commit deb399ea15
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 85 additions and 52 deletions

View File

@ -73,7 +73,7 @@ export class MetadataRepository {
inferTimezoneFromDatestamps: true, inferTimezoneFromDatestamps: true,
inferTimezoneFromTimeStamp: true, inferTimezoneFromTimeStamp: true,
useMWG: true, useMWG: true,
numericTags: [...DefaultReadTaskOptions.numericTags, 'FocalLength'], numericTags: [...DefaultReadTaskOptions.numericTags, 'FocalLength', 'FileSize'],
/* eslint unicorn/no-array-callback-reference: off, unicorn/no-array-method-this-argument: off */ /* eslint unicorn/no-array-callback-reference: off, unicorn/no-array-method-this-argument: off */
geoTz: (lat, lon) => geotz.find(lat, lon)[0], geoTz: (lat, lon) => geotz.find(lat, lon)[0],
// Enable exiftool LFS to parse metadata for files larger than 2GB. // Enable exiftool LFS to parse metadata for files larger than 2GB.

View File

@ -1,6 +1,5 @@
import { BinaryField, ExifDateTime } from 'exiftool-vendored'; import { BinaryField, ExifDateTime } from 'exiftool-vendored';
import { randomBytes } from 'node:crypto'; import { randomBytes } from 'node:crypto';
import { Stats } from 'node:fs';
import { constants } from 'node:fs/promises'; import { constants } from 'node:fs/promises';
import { defaults } from 'src/config'; import { defaults } from 'src/config';
import { AssetEntity } from 'src/entities/asset.entity'; import { AssetEntity } from 'src/entities/asset.entity';
@ -22,8 +21,14 @@ describe(MetadataService.name, () => {
let mocks: ServiceMocks; let mocks: ServiceMocks;
const mockReadTags = (exifData?: Partial<ImmichTags>, sidecarData?: Partial<ImmichTags>) => { const mockReadTags = (exifData?: Partial<ImmichTags>, sidecarData?: Partial<ImmichTags>) => {
exifData = {
FileSize: '123456',
FileCreateDate: '2024-01-01T00:00:00.000Z',
FileModifyDate: '2024-01-01T00:00:00.000Z',
...exifData,
};
mocks.metadata.readTags.mockReset(); mocks.metadata.readTags.mockReset();
mocks.metadata.readTags.mockResolvedValueOnce(exifData ?? {}); mocks.metadata.readTags.mockResolvedValueOnce(exifData);
mocks.metadata.readTags.mockResolvedValueOnce(sidecarData ?? {}); mocks.metadata.readTags.mockResolvedValueOnce(sidecarData ?? {});
}; };
@ -105,10 +110,6 @@ describe(MetadataService.name, () => {
}); });
describe('handleMetadataExtraction', () => { describe('handleMetadataExtraction', () => {
beforeEach(() => {
mocks.storage.stat.mockResolvedValue({ size: 123_456 } as Stats);
});
it('should handle an asset that could not be found', async () => { it('should handle an asset that could not be found', async () => {
await expect(sut.handleMetadataExtraction({ id: assetStub.image.id })).resolves.toBe(JobStatus.FAILED); await expect(sut.handleMetadataExtraction({ id: assetStub.image.id })).resolves.toBe(JobStatus.FAILED);
@ -126,19 +127,24 @@ describe(MetadataService.name, () => {
await sut.handleMetadataExtraction({ id: assetStub.image.id }); await sut.handleMetadataExtraction({ id: assetStub.image.id });
expect(mocks.asset.getByIds).toHaveBeenCalledWith([assetStub.sidecar.id], { faces: { person: false } }); expect(mocks.asset.getByIds).toHaveBeenCalledWith([assetStub.sidecar.id], { faces: { person: false } });
expect(mocks.asset.upsertExif).toHaveBeenCalledWith(expect.objectContaining({ dateTimeOriginal: sidecarDate })); expect(mocks.asset.upsertExif).toHaveBeenCalledWith(expect.objectContaining({ dateTimeOriginal: sidecarDate }));
expect(mocks.asset.update).toHaveBeenCalledWith({ expect(mocks.asset.update).toHaveBeenCalledWith(
id: assetStub.image.id, expect.objectContaining({
duration: null, id: assetStub.image.id,
fileCreatedAt: sidecarDate, duration: null,
localDateTime: sidecarDate, fileCreatedAt: sidecarDate,
}); localDateTime: sidecarDate,
}),
);
}); });
it('should take the file modification date when missing exif and earliest than creation date', async () => { it('should take the file modification date when missing exif and earlier than creation date', async () => {
const fileCreatedAt = new Date('2022-01-01T00:00:00.000Z'); const fileCreatedAt = new Date('2022-01-01T00:00:00.000Z');
const fileModifiedAt = new Date('2021-01-01T00:00:00.000Z'); const fileModifiedAt = new Date('2021-01-01T00:00:00.000Z');
mocks.asset.getByIds.mockResolvedValue([{ ...assetStub.image, fileCreatedAt, fileModifiedAt }]); mocks.asset.getByIds.mockResolvedValue([assetStub.image]);
mockReadTags(); mockReadTags({
FileCreateDate: fileCreatedAt.toISOString(),
FileModifyDate: fileModifiedAt.toISOString(),
});
await sut.handleMetadataExtraction({ id: assetStub.image.id }); await sut.handleMetadataExtraction({ id: assetStub.image.id });
expect(mocks.asset.getByIds).toHaveBeenCalledWith([assetStub.image.id], { faces: { person: false } }); expect(mocks.asset.getByIds).toHaveBeenCalledWith([assetStub.image.id], { faces: { person: false } });
@ -149,15 +155,19 @@ describe(MetadataService.name, () => {
id: assetStub.image.id, id: assetStub.image.id,
duration: null, duration: null,
fileCreatedAt: fileModifiedAt, fileCreatedAt: fileModifiedAt,
fileModifiedAt,
localDateTime: fileModifiedAt, localDateTime: fileModifiedAt,
}); });
}); });
it('should take the file creation date when missing exif and earliest than modification date', async () => { it('should take the file creation date when missing exif and earlier than modification date', async () => {
const fileCreatedAt = new Date('2021-01-01T00:00:00.000Z'); const fileCreatedAt = new Date('2021-01-01T00:00:00.000Z');
const fileModifiedAt = new Date('2022-01-01T00:00:00.000Z'); const fileModifiedAt = new Date('2022-01-01T00:00:00.000Z');
mocks.asset.getByIds.mockResolvedValue([{ ...assetStub.image, fileCreatedAt, fileModifiedAt }]); mocks.asset.getByIds.mockResolvedValue([assetStub.image]);
mockReadTags(); mockReadTags({
FileCreateDate: fileCreatedAt.toISOString(),
FileModifyDate: fileModifiedAt.toISOString(),
});
await sut.handleMetadataExtraction({ id: assetStub.image.id }); await sut.handleMetadataExtraction({ id: assetStub.image.id });
expect(mocks.asset.getByIds).toHaveBeenCalledWith([assetStub.image.id], { faces: { person: false } }); expect(mocks.asset.getByIds).toHaveBeenCalledWith([assetStub.image.id], { faces: { person: false } });
@ -166,6 +176,7 @@ describe(MetadataService.name, () => {
id: assetStub.image.id, id: assetStub.image.id,
duration: null, duration: null,
fileCreatedAt, fileCreatedAt,
fileModifiedAt,
localDateTime: fileCreatedAt, localDateTime: fileCreatedAt,
}); });
}); });
@ -191,7 +202,11 @@ describe(MetadataService.name, () => {
it('should handle lists of numbers', async () => { it('should handle lists of numbers', async () => {
mocks.asset.getByIds.mockResolvedValue([assetStub.image]); mocks.asset.getByIds.mockResolvedValue([assetStub.image]);
mockReadTags({ ISO: [160] }); mockReadTags({
ISO: [160],
FileCreateDate: assetStub.image.fileCreatedAt.toISOString(),
FileModifyDate: assetStub.image.fileModifiedAt.toISOString(),
});
await sut.handleMetadataExtraction({ id: assetStub.image.id }); await sut.handleMetadataExtraction({ id: assetStub.image.id });
expect(mocks.asset.getByIds).toHaveBeenCalledWith([assetStub.image.id], { faces: { person: false } }); expect(mocks.asset.getByIds).toHaveBeenCalledWith([assetStub.image.id], { faces: { person: false } });
@ -200,6 +215,7 @@ describe(MetadataService.name, () => {
id: assetStub.image.id, id: assetStub.image.id,
duration: null, duration: null,
fileCreatedAt: assetStub.image.fileCreatedAt, fileCreatedAt: assetStub.image.fileCreatedAt,
fileModifiedAt: assetStub.image.fileCreatedAt,
localDateTime: assetStub.image.fileCreatedAt, localDateTime: assetStub.image.fileCreatedAt,
}); });
}); });
@ -211,6 +227,8 @@ describe(MetadataService.name, () => {
mockReadTags({ mockReadTags({
GPSLatitude: assetStub.withLocation.exifInfo!.latitude!, GPSLatitude: assetStub.withLocation.exifInfo!.latitude!,
GPSLongitude: assetStub.withLocation.exifInfo!.longitude!, GPSLongitude: assetStub.withLocation.exifInfo!.longitude!,
FileCreateDate: assetStub.withLocation.fileCreatedAt.toISOString(),
FileModifyDate: assetStub.withLocation.fileModifiedAt.toISOString(),
}); });
await sut.handleMetadataExtraction({ id: assetStub.image.id }); await sut.handleMetadataExtraction({ id: assetStub.image.id });
@ -221,7 +239,8 @@ describe(MetadataService.name, () => {
expect(mocks.asset.update).toHaveBeenCalledWith({ expect(mocks.asset.update).toHaveBeenCalledWith({
id: assetStub.withLocation.id, id: assetStub.withLocation.id,
duration: null, duration: null,
fileCreatedAt: assetStub.withLocation.createdAt, fileCreatedAt: assetStub.withLocation.fileCreatedAt,
fileModifiedAt: assetStub.withLocation.fileModifiedAt,
localDateTime: new Date('2023-02-22T05:06:29.716Z'), localDateTime: new Date('2023-02-22T05:06:29.716Z'),
}); });
}); });
@ -460,6 +479,8 @@ describe(MetadataService.name, () => {
// instead of the EmbeddedVideoFile, since HEIC MotionPhotos include both // instead of the EmbeddedVideoFile, since HEIC MotionPhotos include both
EmbeddedVideoFile: new BinaryField(0, ''), EmbeddedVideoFile: new BinaryField(0, ''),
EmbeddedVideoType: 'MotionPhoto_Data', EmbeddedVideoType: 'MotionPhoto_Data',
FileCreateDate: assetStub.livePhotoWithOriginalFileName.fileCreatedAt.toISOString(),
FileModifyDate: assetStub.livePhotoWithOriginalFileName.fileModifiedAt.toISOString(),
}); });
mocks.crypto.hashSha1.mockReturnValue(randomBytes(512)); mocks.crypto.hashSha1.mockReturnValue(randomBytes(512));
mocks.asset.create.mockResolvedValue(assetStub.livePhotoMotionAsset); mocks.asset.create.mockResolvedValue(assetStub.livePhotoMotionAsset);
@ -506,6 +527,8 @@ describe(MetadataService.name, () => {
EmbeddedVideoFile: new BinaryField(0, ''), EmbeddedVideoFile: new BinaryField(0, ''),
EmbeddedVideoType: 'MotionPhoto_Data', EmbeddedVideoType: 'MotionPhoto_Data',
MotionPhoto: 1, MotionPhoto: 1,
FileCreateDate: assetStub.livePhotoWithOriginalFileName.fileCreatedAt.toISOString(),
FileModifyDate: assetStub.livePhotoWithOriginalFileName.fileModifiedAt.toISOString(),
}); });
mocks.crypto.hashSha1.mockReturnValue(randomBytes(512)); mocks.crypto.hashSha1.mockReturnValue(randomBytes(512));
mocks.asset.create.mockResolvedValue(assetStub.livePhotoMotionAsset); mocks.asset.create.mockResolvedValue(assetStub.livePhotoMotionAsset);
@ -552,6 +575,8 @@ describe(MetadataService.name, () => {
MotionPhoto: 1, MotionPhoto: 1,
MicroVideo: 1, MicroVideo: 1,
MicroVideoOffset: 1, MicroVideoOffset: 1,
FileCreateDate: assetStub.livePhotoWithOriginalFileName.fileCreatedAt.toISOString(),
FileModifyDate: assetStub.livePhotoWithOriginalFileName.fileModifiedAt.toISOString(),
}); });
mocks.crypto.hashSha1.mockReturnValue(randomBytes(512)); mocks.crypto.hashSha1.mockReturnValue(randomBytes(512));
mocks.asset.create.mockResolvedValue(assetStub.livePhotoMotionAsset); mocks.asset.create.mockResolvedValue(assetStub.livePhotoMotionAsset);
@ -745,12 +770,14 @@ describe(MetadataService.name, () => {
state: null, state: null,
city: null, city: null,
}); });
expect(mocks.asset.update).toHaveBeenCalledWith({ expect(mocks.asset.update).toHaveBeenCalledWith(
id: assetStub.image.id, expect.objectContaining({
duration: null, id: assetStub.image.id,
fileCreatedAt: dateForTest, duration: null,
localDateTime: dateForTest, fileCreatedAt: dateForTest,
}); localDateTime: dateForTest,
}),
);
}); });
it('should extract +00:00 timezone from raw value', async () => { it('should extract +00:00 timezone from raw value', async () => {

View File

@ -171,21 +171,17 @@ export class MetadataService extends BaseService {
return JobStatus.FAILED; return JobStatus.FAILED;
} }
const [stats, exifTags] = await Promise.all([ const exifTags = await this.getExifTags(asset);
this.storageRepository.stat(asset.originalPath), if (!exifTags.FileCreateDate || !exifTags.FileModifyDate || exifTags.FileSize === undefined) {
this.getExifTags(asset), this.logger.warn(`Missing file creation or modification date for asset ${asset.id}: ${asset.originalPath}`);
]); const stat = await this.storageRepository.stat(asset.originalPath);
exifTags.FileCreateDate = stat.ctime.toISOString();
exifTags.FileModifyDate = stat.mtime.toISOString();
exifTags.FileSize = stat.size.toString();
}
this.logger.verbose('Exif Tags', exifTags); this.logger.verbose('Exif Tags', exifTags);
if (!asset.fileCreatedAt) {
asset.fileCreatedAt = stats.mtime;
}
if (!asset.fileModifiedAt) {
asset.fileModifiedAt = stats.mtime;
}
const { dateTimeOriginal, localDateTime, timeZone, modifyDate } = this.getDates(asset, exifTags); const { dateTimeOriginal, localDateTime, timeZone, modifyDate } = this.getDates(asset, exifTags);
const { width, height } = this.getImageDimensions(exifTags); const { width, height } = this.getImageDimensions(exifTags);
@ -216,7 +212,7 @@ export class MetadataService extends BaseService {
city: geo.city, city: geo.city,
// image/file // image/file
fileSizeInByte: stats.size, fileSizeInByte: Number.parseInt(exifTags.FileSize!),
exifImageHeight: validate(height), exifImageHeight: validate(height),
exifImageWidth: validate(width), exifImageWidth: validate(width),
orientation: validate(exifTags.Orientation)?.toString() ?? null, orientation: validate(exifTags.Orientation)?.toString() ?? null,
@ -251,13 +247,13 @@ export class MetadataService extends BaseService {
duration: exifTags.Duration?.toString() ?? null, duration: exifTags.Duration?.toString() ?? null,
localDateTime, localDateTime,
fileCreatedAt: exifData.dateTimeOriginal ?? undefined, fileCreatedAt: exifData.dateTimeOriginal ?? undefined,
fileModifiedAt: stats.mtime, fileModifiedAt: exifData.modifyDate ?? undefined,
}), }),
this.applyTagList(asset, exifTags), this.applyTagList(asset, exifTags),
]; ];
if (this.isMotionPhoto(asset, exifTags)) { if (this.isMotionPhoto(asset, exifTags)) {
promises.push(this.applyMotionPhotos(asset, exifTags)); promises.push(this.applyMotionPhotos(asset, exifTags, exifData.fileSizeInByte!));
} }
if (isFaceImportEnabled(metadata) && this.hasTaggedFaces(exifTags)) { if (isFaceImportEnabled(metadata) && this.hasTaggedFaces(exifTags)) {
@ -436,7 +432,7 @@ export class MetadataService extends BaseService {
return asset.type === AssetType.IMAGE && !!(tags.MotionPhoto || tags.MicroVideo); return asset.type === AssetType.IMAGE && !!(tags.MotionPhoto || tags.MicroVideo);
} }
private async applyMotionPhotos(asset: AssetEntity, tags: ImmichTags) { private async applyMotionPhotos(asset: AssetEntity, tags: ImmichTags, fileSize: number) {
const isMotionPhoto = tags.MotionPhoto; const isMotionPhoto = tags.MotionPhoto;
const isMicroVideo = tags.MicroVideo; const isMicroVideo = tags.MicroVideo;
const videoOffset = tags.MicroVideoOffset; const videoOffset = tags.MicroVideoOffset;
@ -470,8 +466,7 @@ export class MetadataService extends BaseService {
this.logger.debug(`Starting motion photo video extraction for asset ${asset.id}: ${asset.originalPath}`); this.logger.debug(`Starting motion photo video extraction for asset ${asset.id}: ${asset.originalPath}`);
try { try {
const stat = await this.storageRepository.stat(asset.originalPath); const position = fileSize - length - padding;
const position = stat.size - length - padding;
let video: Buffer; let video: Buffer;
// Samsung MotionPhoto video extraction // Samsung MotionPhoto video extraction
// HEIC-encoded // HEIC-encoded
@ -659,10 +654,12 @@ export class MetadataService extends BaseService {
this.logger.debug(`No timezone information found for asset ${asset.id}: ${asset.originalPath}`); this.logger.debug(`No timezone information found for asset ${asset.id}: ${asset.originalPath}`);
} }
const modifyDate = this.toDate(exifTags.FileModifyDate!);
let dateTimeOriginal = dateTime?.toDate(); let dateTimeOriginal = dateTime?.toDate();
let localDateTime = dateTime?.toDateTime().setZone('UTC', { keepLocalTime: true }).toJSDate(); let localDateTime = dateTime?.toDateTime().setZone('UTC', { keepLocalTime: true }).toJSDate();
if (!localDateTime || !dateTimeOriginal) { if (!localDateTime || !dateTimeOriginal) {
const earliestDate = this.earliestDate(asset.fileModifiedAt, asset.fileCreatedAt); const fileCreatedAt = this.toDate(exifTags.FileCreateDate!);
const earliestDate = this.earliestDate(fileCreatedAt, modifyDate);
this.logger.debug( this.logger.debug(
`No exif date time found, falling back on ${earliestDate.toISOString()}, earliest of file creation and modification for assset ${asset.id}: ${asset.originalPath}`, `No exif date time found, falling back on ${earliestDate.toISOString()}, earliest of file creation and modification for assset ${asset.id}: ${asset.originalPath}`,
); );
@ -674,11 +671,6 @@ export class MetadataService extends BaseService {
`Found local date time ${localDateTime.toISOString()} for asset ${asset.id}: ${asset.originalPath}`, `Found local date time ${localDateTime.toISOString()} for asset ${asset.id}: ${asset.originalPath}`,
); );
let modifyDate = asset.fileModifiedAt;
try {
modifyDate = (exifTags.ModifyDate as ExifDateTime)?.toDate() ?? modifyDate;
} catch {}
return { return {
dateTimeOriginal, dateTimeOriginal,
timeZone, timeZone,
@ -687,6 +679,10 @@ export class MetadataService extends BaseService {
}; };
} }
private toDate(date: string | ExifDateTime): Date {
return typeof date === 'string' ? new Date(date) : date.toDate();
}
private earliestDate(a: Date, b: Date) { private earliestDate(a: Date, b: Date) {
return new Date(Math.min(a.valueOf(), b.valueOf())); return new Date(Math.min(a.valueOf(), b.valueOf()));
} }

View File

@ -36,7 +36,7 @@ describe(MetadataService.name, () => {
beforeEach(() => { beforeEach(() => {
({ sut, mocks } = newTestService(MetadataService, { metadata: metadataRepository })); ({ sut, mocks } = newTestService(MetadataService, { metadata: metadataRepository }));
mocks.storage.stat.mockResolvedValue({ size: 123_456 } as Stats); mocks.storage.stat.mockResolvedValue({ size: 123_456, ctime: new Date(), mtime: new Date() } as Stats);
delete process.env.TZ; delete process.env.TZ;
}); });
@ -51,6 +51,8 @@ describe(MetadataService.name, () => {
description: 'should handle no time zone information', description: 'should handle no time zone information',
exifData: { exifData: {
DateTimeOriginal: '2022:01:01 00:00:00', DateTimeOriginal: '2022:01:01 00:00:00',
FileCreateDate: '2022:01:01 00:00:00',
FileModifyDate: '2022:01:01 00:00:00',
}, },
expected: { expected: {
localDateTime: '2022-01-01T00:00:00.000Z', localDateTime: '2022-01-01T00:00:00.000Z',
@ -63,6 +65,8 @@ describe(MetadataService.name, () => {
serverTimeZone: 'America/Los_Angeles', serverTimeZone: 'America/Los_Angeles',
exifData: { exifData: {
DateTimeOriginal: '2022:01:01 00:00:00', DateTimeOriginal: '2022:01:01 00:00:00',
FileCreateDate: '2022:01:01 00:00:00',
FileModifyDate: '2022:01:01 00:00:00',
}, },
expected: { expected: {
localDateTime: '2022-01-01T00:00:00.000Z', localDateTime: '2022-01-01T00:00:00.000Z',
@ -75,6 +79,8 @@ describe(MetadataService.name, () => {
serverTimeZone: 'Europe/Brussels', serverTimeZone: 'Europe/Brussels',
exifData: { exifData: {
DateTimeOriginal: '2022:01:01 00:00:00', DateTimeOriginal: '2022:01:01 00:00:00',
FileCreateDate: '2022:01:01 00:00:00',
FileModifyDate: '2022:01:01 00:00:00',
}, },
expected: { expected: {
localDateTime: '2022-01-01T00:00:00.000Z', localDateTime: '2022-01-01T00:00:00.000Z',
@ -87,6 +93,8 @@ describe(MetadataService.name, () => {
serverTimeZone: 'Europe/Brussels', serverTimeZone: 'Europe/Brussels',
exifData: { exifData: {
DateTimeOriginal: '2022:06:01 00:00:00', DateTimeOriginal: '2022:06:01 00:00:00',
FileCreateDate: '2022:06:01 00:00:00',
FileModifyDate: '2022:06:01 00:00:00',
}, },
expected: { expected: {
localDateTime: '2022-06-01T00:00:00.000Z', localDateTime: '2022-06-01T00:00:00.000Z',
@ -98,6 +106,8 @@ describe(MetadataService.name, () => {
description: 'should handle a +13:00 time zone', description: 'should handle a +13:00 time zone',
exifData: { exifData: {
DateTimeOriginal: '2022:01:01 00:00:00+13:00', DateTimeOriginal: '2022:01:01 00:00:00+13:00',
FileCreateDate: '2022:01:01 00:00:00+13:00',
FileModifyDate: '2022:01:01 00:00:00+13:00',
}, },
expected: { expected: {
localDateTime: '2022-01-01T00:00:00.000Z', localDateTime: '2022-01-01T00:00:00.000Z',