From e73686bd76b6147a4e1ccbce4eab23a4ece8ebd9 Mon Sep 17 00:00:00 2001 From: Luis Nachtigall <31982496+LeLunZ@users.noreply.github.com> Date: Sat, 7 Mar 2026 16:41:26 +0100 Subject: [PATCH 01/26] feat(android): enhance playback style detection using MIME type, reducing glide exposure (#26747) * feat(android): enhance playback style detection using MIME type * feat(android): improve playback style detection for GIF and WebP formats * fix(android): make playback style detection faster * refactor(android): simplify XMP reading logic for API 29 and below * update playback style detection documentation * use DefaultImageHeaderParser instead of all available ones for webp playbackStyle type detection --- .../alextran/immich/sync/MessagesImplBase.kt | 82 +++++++++++-------- 1 file changed, 49 insertions(+), 33 deletions(-) diff --git a/mobile/android/app/src/main/kotlin/app/alextran/immich/sync/MessagesImplBase.kt b/mobile/android/app/src/main/kotlin/app/alextran/immich/sync/MessagesImplBase.kt index 0cc642c862..949720325e 100644 --- a/mobile/android/app/src/main/kotlin/app/alextran/immich/sync/MessagesImplBase.kt +++ b/mobile/android/app/src/main/kotlin/app/alextran/immich/sync/MessagesImplBase.kt @@ -16,6 +16,7 @@ import app.alextran.immich.core.ImmichPlugin import com.bumptech.glide.Glide import com.bumptech.glide.load.ImageHeaderParser import com.bumptech.glide.load.ImageHeaderParserUtils +import com.bumptech.glide.load.resource.bitmap.DefaultImageHeaderParser import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Job @@ -81,10 +82,13 @@ open class NativeSyncApiImplBase(context: Context) : ImmichPlugin() { } if (hasSpecialFormatColumn()) { add(SPECIAL_FORMAT_COLUMN) - } else if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.R) { - // Fallback: read XMP from MediaStore to detect Motion Photos - // only needed if SPECIAL_FORMAT column isn't available - add(MediaStore.MediaColumns.XMP) + } else { + // fallback to mimetype and xmp for playback style detection on older Android versions + // both only needed if special format column is not available + add(MediaStore.MediaColumns.MIME_TYPE) + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.R) { + add(MediaStore.MediaColumns.XMP) + } } }.toTypedArray() @@ -131,6 +135,7 @@ open class NativeSyncApiImplBase(context: Context) : ImmichPlugin() { val dateAddedColumn = c.getColumnIndexOrThrow(MediaStore.MediaColumns.DATE_ADDED) val dateModifiedColumn = c.getColumnIndexOrThrow(MediaStore.MediaColumns.DATE_MODIFIED) val mediaTypeColumn = c.getColumnIndexOrThrow(MediaStore.Files.FileColumns.MEDIA_TYPE) + val mimeTypeColumn = c.getColumnIndex(MediaStore.MediaColumns.MIME_TYPE) val bucketIdColumn = c.getColumnIndexOrThrow(MediaStore.MediaColumns.BUCKET_ID) val widthColumn = c.getColumnIndexOrThrow(MediaStore.MediaColumns.WIDTH) val heightColumn = c.getColumnIndexOrThrow(MediaStore.MediaColumns.HEIGHT) @@ -177,7 +182,7 @@ open class NativeSyncApiImplBase(context: Context) : ImmichPlugin() { val isFavorite = if (favoriteColumn == -1) false else c.getInt(favoriteColumn) != 0 val playbackStyle = detectPlaybackStyle( - numericId, rawMediaType, specialFormatColumn, xmpColumn, c + numericId, rawMediaType, mimeTypeColumn, specialFormatColumn, xmpColumn, c ) val asset = PlatformAsset( @@ -200,13 +205,14 @@ open class NativeSyncApiImplBase(context: Context) : ImmichPlugin() { } /** - * Detects the playback style for an asset using _special_format (API 33+) - * or XMP / MIME / RIFF header fallbacks (pre-33). + * Detects the playback style for an asset using _special_format (SDK Extension 21+) + * or XMP / MIME / RIFF header fallbacks. */ @SuppressLint("NewApi") private fun detectPlaybackStyle( assetId: Long, rawMediaType: Int, + mimeTypeColumn: Int, specialFormatColumn: Int, xmpColumn: Int, cursor: Cursor @@ -231,46 +237,56 @@ open class NativeSyncApiImplBase(context: Context) : ImmichPlugin() { return PlatformAssetPlaybackStyle.UNKNOWN } - // Pre-API 33 fallback + val mimeType = if (mimeTypeColumn != -1) cursor.getString(mimeTypeColumn) else null + + // GIFs are always animated and cannot be motion photos; no I/O needed + if (mimeType == "image/gif") { + return PlatformAssetPlaybackStyle.IMAGE_ANIMATED + } + val uri = ContentUris.withAppendedId( MediaStore.Files.getContentUri(MediaStore.VOLUME_EXTERNAL), assetId ) - // Read XMP from cursor (API 30+) or ExifInterface stream (pre-30) + // Only WebP needs a stream check to distinguish static vs animated; + // WebP files are not used as motion photos, so skip XMP detection + if (mimeType == "image/webp") { + try { + val glide = Glide.get(ctx) + ctx.contentResolver.openInputStream(uri)?.use { stream -> + val type = ImageHeaderParserUtils.getType( + listOf(DefaultImageHeaderParser()), + stream, + glide.arrayPool + ) + // Also check for GIF just in case MIME type is incorrect; Doesn't hurt performance + if (type == ImageHeaderParser.ImageType.ANIMATED_WEBP || type == ImageHeaderParser.ImageType.GIF) { + return PlatformAssetPlaybackStyle.IMAGE_ANIMATED + } + } + } catch (e: Exception) { + Log.w(TAG, "Failed to parse image header for asset $assetId", e) + } + // if mimeType is webp but not animated, its just an image. + return PlatformAssetPlaybackStyle.IMAGE + } + + + // Read XMP from cursor (API 30+) val xmp: String? = if (xmpColumn != -1) { cursor.getBlob(xmpColumn)?.toString(Charsets.UTF_8) } else { - try { - ctx.contentResolver.openInputStream(uri)?.use { stream -> - ExifInterface(stream).getAttribute(ExifInterface.TAG_XMP) - } - } catch (e: Exception) { - Log.w(TAG, "Failed to read XMP for asset $assetId", e) - null - } + // if xmp column is not available, we are on API 29 or below + // theoretically there were motion photos but the Camera:MotionPhoto xmp tag + // was only added in Android 11, so we should not have to worry about parsing XMP on older versions + null } if (xmp != null && "Camera:MotionPhoto" in xmp) { return PlatformAssetPlaybackStyle.LIVE_PHOTO } - try { - ctx.contentResolver.openInputStream(uri)?.use { stream -> - val glide = Glide.get(ctx) - val type = ImageHeaderParserUtils.getType( - glide.registry.imageHeaderParsers, - stream, - glide.arrayPool - ) - if (type == ImageHeaderParser.ImageType.GIF || type == ImageHeaderParser.ImageType.ANIMATED_WEBP) { - return PlatformAssetPlaybackStyle.IMAGE_ANIMATED - } - } - } catch (e: Exception) { - Log.w(TAG, "Failed to parse image header for asset $assetId", e) - } - return PlatformAssetPlaybackStyle.IMAGE } From dd72ec2621c9cc686997d26e20f1d2761c716c90 Mon Sep 17 00:00:00 2001 From: Thomas <9749173+uhthomas@users.noreply.github.com> Date: Sat, 7 Mar 2026 18:07:34 +0000 Subject: [PATCH 02/26] fix(mobile): correct local asset dimensions (#26677) * fix(mobile): correct local asset dimensions We are constraining the size of videos so that they play nicely with hero animations, and don't stretch in weird ways. This however caused a regression as we are not account for local assets on Android which have un-oriented dimensions. * post-orientation width and height in local sync * migration * no need to handle it in asset viewer --------- Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com> Co-authored-by: Alex --- .../alextran/immich/sync/MessagesImplBase.kt | 7 ++++--- mobile/lib/utils/migration.dart | 18 +++++++++++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/mobile/android/app/src/main/kotlin/app/alextran/immich/sync/MessagesImplBase.kt b/mobile/android/app/src/main/kotlin/app/alextran/immich/sync/MessagesImplBase.kt index 949720325e..05671579ae 100644 --- a/mobile/android/app/src/main/kotlin/app/alextran/immich/sync/MessagesImplBase.kt +++ b/mobile/android/app/src/main/kotlin/app/alextran/immich/sync/MessagesImplBase.kt @@ -185,16 +185,17 @@ open class NativeSyncApiImplBase(context: Context) : ImmichPlugin() { numericId, rawMediaType, mimeTypeColumn, specialFormatColumn, xmpColumn, c ) + val isFlipped = orientation == 90 || orientation == 270 val asset = PlatformAsset( id, name, assetType, createdAt, modifiedAt, - width, - height, + if (isFlipped) height else width, + if (isFlipped) width else height, duration, - orientation.toLong(), + 0L, isFavorite, playbackStyle = playbackStyle, ) diff --git a/mobile/lib/utils/migration.dart b/mobile/lib/utils/migration.dart index aeed9f616e..efb4d60369 100644 --- a/mobile/lib/utils/migration.dart +++ b/mobile/lib/utils/migration.dart @@ -35,7 +35,7 @@ import 'package:isar/isar.dart'; // ignore: import_rule_photo_manager import 'package:photo_manager/photo_manager.dart'; -const int targetVersion = 23; +const int targetVersion = 24; Future migrateDatabaseIfNeeded(Isar db, Drift drift) async { final hasVersion = Store.tryGet(StoreKey.version) != null; @@ -105,6 +105,10 @@ Future migrateDatabaseIfNeeded(Isar db, Drift drift) async { await _populateLocalAssetPlaybackStyle(drift); } + if (version < 24 && Store.isBetaTimelineEnabled) { + await _applyLocalAssetOrientation(drift); + } + if (version < 22 && !Store.isBetaTimelineEnabled) { await Store.put(StoreKey.needBetaMigration, true); } @@ -436,6 +440,18 @@ Future _populateLocalAssetPlaybackStyle(Drift db) async { } } +Future _applyLocalAssetOrientation(Drift db) { + final query = db.localAssetEntity.update() + ..where((filter) => (filter.orientation.equals(90) | (filter.orientation.equals(270)))); + return query.write( + LocalAssetEntityCompanion.custom( + width: db.localAssetEntity.height, + height: db.localAssetEntity.width, + orientation: const Variable(0), + ), + ); +} + AssetPlaybackStyle _toPlaybackStyle(PlatformAssetPlaybackStyle style) => switch (style) { PlatformAssetPlaybackStyle.unknown => AssetPlaybackStyle.unknown, PlatformAssetPlaybackStyle.image => AssetPlaybackStyle.image, From 4a384bca86c8e8d461e520e92a94a2fc67036996 Mon Sep 17 00:00:00 2001 From: Sergey Katsubo Date: Sat, 7 Mar 2026 21:08:42 +0300 Subject: [PATCH 03/26] fix(server): opus handling as accepted audio codec in transcode policy (#26736) * Fix opus handling as accepted audio codec in transcode policy Fix the issue when opus is among accepted audio codecs in transcode policy (which is default) but it still triggers transcoding because the codec name from ffprobe (opus) does not match `libopus` literal in Immich. Make a distinction between a codec name and encoder: - codec name: switch to `opus` as the audio codec name. This matches what ffprobe returns for a media file with opus audio. - encoder: continue using the `libopus` encoder in ffmpeg. * Add unit tests for accepted audio codecs and for libopus encoder * Add db migration for ffmpeg.targetAudioCodec opus * backward compatibility * tweak * noisy logs * full mapping * make check happy * mark deprecated * update api * indexOf --------- Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com> --- docs/docs/administration/system-settings.md | 2 +- docs/docs/install/config-file.md | 2 +- mobile/openapi/lib/model/audio_codec.dart | 3 + open-api/immich-openapi-specs.json | 1 + open-api/typescript-sdk/src/fetch-client.ts | 1 + server/src/config.ts | 2 +- server/src/constants.ts | 10 ++- server/src/dtos/system-config.dto.ts | 12 +++- server/src/enum.ts | 4 +- .../1772609167000-UpdateOpusCodecName.ts | 65 +++++++++++++++++++ server/src/services/media.service.spec.ts | 44 +++++++++++++ .../services/system-config.service.spec.ts | 2 +- server/src/utils/media.ts | 7 +- server/test/fixtures/media.stub.ts | 8 +++ .../admin-settings/FFmpegSettings.svelte | 4 +- 15 files changed, 155 insertions(+), 12 deletions(-) create mode 100644 server/src/schema/migrations/1772609167000-UpdateOpusCodecName.ts diff --git a/docs/docs/administration/system-settings.md b/docs/docs/administration/system-settings.md index fdfdad29ea..7dc9c08db3 100644 --- a/docs/docs/administration/system-settings.md +++ b/docs/docs/administration/system-settings.md @@ -230,7 +230,7 @@ The default value is `ultrafast`. ### Audio codec (`ffmpeg.targetAudioCodec`) {#ffmpeg.targetAudioCodec} -Which audio codec to use when the audio stream is being transcoded. Can be one of `mp3`, `aac`, `libopus`. +Which audio codec to use when the audio stream is being transcoded. Can be one of `mp3`, `aac`, `opus`. The default value is `aac`. diff --git a/docs/docs/install/config-file.md b/docs/docs/install/config-file.md index bf815521ef..3355750603 100644 --- a/docs/docs/install/config-file.md +++ b/docs/docs/install/config-file.md @@ -27,7 +27,7 @@ The default configuration looks like this: "ffmpeg": { "accel": "disabled", "accelDecode": false, - "acceptedAudioCodecs": ["aac", "mp3", "libopus"], + "acceptedAudioCodecs": ["aac", "mp3", "opus"], "acceptedContainers": ["mov", "ogg", "webm"], "acceptedVideoCodecs": ["h264"], "bframes": -1, diff --git a/mobile/openapi/lib/model/audio_codec.dart b/mobile/openapi/lib/model/audio_codec.dart index 095c616995..be1ff0dcb9 100644 --- a/mobile/openapi/lib/model/audio_codec.dart +++ b/mobile/openapi/lib/model/audio_codec.dart @@ -26,6 +26,7 @@ class AudioCodec { static const mp3 = AudioCodec._(r'mp3'); static const aac = AudioCodec._(r'aac'); static const libopus = AudioCodec._(r'libopus'); + static const opus = AudioCodec._(r'opus'); static const pcmS16le = AudioCodec._(r'pcm_s16le'); /// List of all possible values in this [enum][AudioCodec]. @@ -33,6 +34,7 @@ class AudioCodec { mp3, aac, libopus, + opus, pcmS16le, ]; @@ -75,6 +77,7 @@ class AudioCodecTypeTransformer { case r'mp3': return AudioCodec.mp3; case r'aac': return AudioCodec.aac; case r'libopus': return AudioCodec.libopus; + case r'opus': return AudioCodec.opus; case r'pcm_s16le': return AudioCodec.pcmS16le; default: if (!allowNull) { diff --git a/open-api/immich-openapi-specs.json b/open-api/immich-openapi-specs.json index 38e1fe8e01..d2eb322009 100644 --- a/open-api/immich-openapi-specs.json +++ b/open-api/immich-openapi-specs.json @@ -17260,6 +17260,7 @@ "mp3", "aac", "libopus", + "opus", "pcm_s16le" ], "type": "string" diff --git a/open-api/typescript-sdk/src/fetch-client.ts b/open-api/typescript-sdk/src/fetch-client.ts index 1ae12cd091..5c8ac6dbc1 100644 --- a/open-api/typescript-sdk/src/fetch-client.ts +++ b/open-api/typescript-sdk/src/fetch-client.ts @@ -7324,6 +7324,7 @@ export enum AudioCodec { Mp3 = "mp3", Aac = "aac", Libopus = "libopus", + Opus = "opus", PcmS16Le = "pcm_s16le" } export enum VideoContainer { diff --git a/server/src/config.ts b/server/src/config.ts index 2a43b51187..e6134df477 100644 --- a/server/src/config.ts +++ b/server/src/config.ts @@ -206,7 +206,7 @@ export const defaults = Object.freeze({ targetVideoCodec: VideoCodec.H264, acceptedVideoCodecs: [VideoCodec.H264], targetAudioCodec: AudioCodec.Aac, - acceptedAudioCodecs: [AudioCodec.Aac, AudioCodec.Mp3, AudioCodec.LibOpus], + acceptedAudioCodecs: [AudioCodec.Aac, AudioCodec.Mp3, AudioCodec.Opus], acceptedContainers: [VideoContainer.Mov, VideoContainer.Ogg, VideoContainer.Webm], targetResolution: '720', maxBitrate: '0', diff --git a/server/src/constants.ts b/server/src/constants.ts index 9ea5e134b6..e24057beba 100644 --- a/server/src/constants.ts +++ b/server/src/constants.ts @@ -2,7 +2,7 @@ import { Duration } from 'luxon'; import { readFileSync } from 'node:fs'; import { dirname, join } from 'node:path'; import { SemVer } from 'semver'; -import { ApiTag, DatabaseExtension, ExifOrientation, VectorIndex } from 'src/enum'; +import { ApiTag, AudioCodec, DatabaseExtension, ExifOrientation, VectorIndex } from 'src/enum'; export const ErrorMessages = { InconsistentMediaLocation: @@ -201,3 +201,11 @@ export const endpointTags: Record = { [ApiTag.Workflows]: 'A workflow is a set of actions that run whenever a triggering event occurs. Workflows also can include filters to further limit execution.', }; + +export const AUDIO_ENCODER: Record = { + [AudioCodec.Aac]: 'aac', + [AudioCodec.Mp3]: 'mp3', + [AudioCodec.Libopus]: 'libopus', + [AudioCodec.Opus]: 'libopus', + [AudioCodec.PcmS16le]: 'pcm_s16le', +}; diff --git a/server/src/dtos/system-config.dto.ts b/server/src/dtos/system-config.dto.ts index 7a0dcb6f3a..a214dbc467 100644 --- a/server/src/dtos/system-config.dto.ts +++ b/server/src/dtos/system-config.dto.ts @@ -1,5 +1,5 @@ import { ApiProperty } from '@nestjs/swagger'; -import { Type } from 'class-transformer'; +import { Transform, Type } from 'class-transformer'; import { ArrayMinSize, IsInt, @@ -92,6 +92,16 @@ export class SystemConfigFFmpegDto { targetAudioCodec!: AudioCodec; @ValidateEnum({ enum: AudioCodec, name: 'AudioCodec', each: true, description: 'Accepted audio codecs' }) + @Transform(({ value }) => { + if (Array.isArray(value)) { + const libopusIndex = value.indexOf('libopus'); + if (libopusIndex !== -1) { + value[libopusIndex] = 'opus'; + } + } + + return value; + }) acceptedAudioCodecs!: AudioCodec[]; @ValidateEnum({ enum: VideoContainer, name: 'VideoContainer', each: true, description: 'Accepted containers' }) diff --git a/server/src/enum.ts b/server/src/enum.ts index 2aa9bd2aa6..887c8fa93c 100644 --- a/server/src/enum.ts +++ b/server/src/enum.ts @@ -409,7 +409,9 @@ export enum VideoCodec { export enum AudioCodec { Mp3 = 'mp3', Aac = 'aac', - LibOpus = 'libopus', + /** @deprecated Use `Opus` instead */ + Libopus = 'libopus', + Opus = 'opus', PcmS16le = 'pcm_s16le', } diff --git a/server/src/schema/migrations/1772609167000-UpdateOpusCodecName.ts b/server/src/schema/migrations/1772609167000-UpdateOpusCodecName.ts new file mode 100644 index 0000000000..9fa5f7d788 --- /dev/null +++ b/server/src/schema/migrations/1772609167000-UpdateOpusCodecName.ts @@ -0,0 +1,65 @@ +import { Kysely, sql } from 'kysely'; + +export async function up(db: Kysely): Promise { + await sql` + UPDATE system_metadata + SET value = jsonb_set( + value, + '{ffmpeg,acceptedAudioCodecs}', + ( + SELECT jsonb_agg( + CASE + WHEN elem = 'libopus' THEN 'opus' + ELSE elem + END + ) + FROM jsonb_array_elements_text(value->'ffmpeg'->'acceptedAudioCodecs') elem + ) + ) + WHERE key = 'system-config' + AND value->'ffmpeg'->'acceptedAudioCodecs' ? 'libopus'; + `.execute(db); + + await sql` + UPDATE system_metadata + SET value = jsonb_set( + value, + '{ffmpeg,targetAudioCodec}', + '"opus"'::jsonb + ) + WHERE key = 'system-config' + AND value->'ffmpeg'->>'targetAudioCodec' = 'libopus'; + `.execute(db); +} + +export async function down(db: Kysely): Promise { + await sql` + UPDATE system_metadata + SET value = jsonb_set( + value, + '{ffmpeg,acceptedAudioCodecs}', + ( + SELECT jsonb_agg( + CASE + WHEN elem = 'opus' THEN 'libopus' + ELSE elem + END + ) + FROM jsonb_array_elements_text(value->'ffmpeg'->'acceptedAudioCodecs') elem + ) + ) + WHERE key = 'system-config' + AND value->'ffmpeg'->'acceptedAudioCodecs' ? 'opus'; + `.execute(db); + + await sql` + UPDATE system_metadata + SET value = jsonb_set( + value, + '{ffmpeg,targetAudioCodec}', + '"libopus"'::jsonb + ) + WHERE key = 'system-config' + AND value->'ffmpeg'->>'targetAudioCodec' = 'opus'; + `.execute(db); +} diff --git a/server/src/services/media.service.spec.ts b/server/src/services/media.service.spec.ts index 12440fb263..cd61d7b45b 100644 --- a/server/src/services/media.service.spec.ts +++ b/server/src/services/media.service.spec.ts @@ -2571,6 +2571,50 @@ describe(MediaService.name, () => { expect(mocks.media.transcode).not.toHaveBeenCalled(); }); + describe('should skip transcoding for accepted audio codecs with optimal policy if video is fine', () => { + const acceptedCodecs = [ + { codec: 'aac', probeStub: probeStub.audioStreamAac }, + { codec: 'mp3', probeStub: probeStub.audioStreamMp3 }, + { codec: 'opus', probeStub: probeStub.audioStreamOpus }, + ]; + + beforeEach(() => { + mocks.systemMetadata.get.mockResolvedValue({ + ffmpeg: { + targetVideoCodec: VideoCodec.Hevc, + transcode: TranscodePolicy.Optimal, + targetResolution: '1080p', + }, + }); + }); + + it.each(acceptedCodecs)('should skip $codec', async ({ probeStub }) => { + mocks.media.probe.mockResolvedValue(probeStub); + await sut.handleVideoConversion({ id: 'video-id' }); + expect(mocks.media.transcode).not.toHaveBeenCalled(); + }); + }); + + it('should use libopus audio encoder when target audio is opus', async () => { + mocks.media.probe.mockResolvedValue(probeStub.audioStreamAac); + mocks.systemMetadata.get.mockResolvedValue({ + ffmpeg: { + targetAudioCodec: AudioCodec.Opus, + transcode: TranscodePolicy.All, + }, + }); + await sut.handleVideoConversion({ id: 'video-id' }); + expect(mocks.media.transcode).toHaveBeenCalledWith( + '/original/path.ext', + expect.any(String), + expect.objectContaining({ + inputOptions: expect.any(Array), + outputOptions: expect.arrayContaining(['-c:a libopus']), + twoPass: false, + }), + ); + }); + it('should fail if hwaccel is enabled for an unsupported codec', async () => { mocks.media.probe.mockResolvedValue(probeStub.matroskaContainer); mocks.systemMetadata.get.mockResolvedValue({ diff --git a/server/src/services/system-config.service.spec.ts b/server/src/services/system-config.service.spec.ts index 1c93c9d7d3..b346906fc8 100644 --- a/server/src/services/system-config.service.spec.ts +++ b/server/src/services/system-config.service.spec.ts @@ -55,7 +55,7 @@ const updatedConfig = Object.freeze({ threads: 0, preset: 'ultrafast', targetAudioCodec: AudioCodec.Aac, - acceptedAudioCodecs: [AudioCodec.Aac, AudioCodec.Mp3, AudioCodec.LibOpus], + acceptedAudioCodecs: [AudioCodec.Aac, AudioCodec.Mp3, AudioCodec.Opus], targetResolution: '720', targetVideoCodec: VideoCodec.H264, acceptedVideoCodecs: [VideoCodec.H264], diff --git a/server/src/utils/media.ts b/server/src/utils/media.ts index b2ffb9ac8b..ce185305bd 100644 --- a/server/src/utils/media.ts +++ b/server/src/utils/media.ts @@ -1,3 +1,4 @@ +import { AUDIO_ENCODER } from 'src/constants'; import { SystemConfigFFmpegDto } from 'src/dtos/system-config.dto'; import { CQMode, ToneMapping, TranscodeHardwareAcceleration, TranscodeTarget, VideoCodec } from 'src/enum'; import { @@ -117,7 +118,7 @@ export class BaseConfig implements VideoCodecSWConfig { getBaseOutputOptions(target: TranscodeTarget, videoStream: VideoStreamInfo, audioStream?: AudioStreamInfo) { const videoCodec = [TranscodeTarget.All, TranscodeTarget.Video].includes(target) ? this.getVideoCodec() : 'copy'; - const audioCodec = [TranscodeTarget.All, TranscodeTarget.Audio].includes(target) ? this.getAudioCodec() : 'copy'; + const audioCodec = [TranscodeTarget.All, TranscodeTarget.Audio].includes(target) ? this.getAudioEncoder() : 'copy'; const options = [ `-c:v ${videoCodec}`, @@ -305,8 +306,8 @@ export class BaseConfig implements VideoCodecSWConfig { return [options]; } - getAudioCodec(): string { - return this.config.targetAudioCodec; + getAudioEncoder(): string { + return AUDIO_ENCODER[this.config.targetAudioCodec]; } getVideoCodec(): string { diff --git a/server/test/fixtures/media.stub.ts b/server/test/fixtures/media.stub.ts index f80ad70c8f..23617fcaf0 100644 --- a/server/test/fixtures/media.stub.ts +++ b/server/test/fixtures/media.stub.ts @@ -221,6 +221,14 @@ export const probeStub = { ...probeStubDefault, audioStreams: [{ index: 1, codecName: 'aac', bitrate: 100 }], }), + audioStreamMp3: Object.freeze({ + ...probeStubDefault, + audioStreams: [{ index: 1, codecName: 'mp3', bitrate: 100 }], + }), + audioStreamOpus: Object.freeze({ + ...probeStubDefault, + audioStreams: [{ index: 1, codecName: 'opus', bitrate: 100 }], + }), audioStreamUnknown: Object.freeze({ ...probeStubDefault, audioStreams: [ diff --git a/web/src/lib/components/admin-settings/FFmpegSettings.svelte b/web/src/lib/components/admin-settings/FFmpegSettings.svelte index 83596069f9..e062b616b3 100644 --- a/web/src/lib/components/admin-settings/FFmpegSettings.svelte +++ b/web/src/lib/components/admin-settings/FFmpegSettings.svelte @@ -115,7 +115,7 @@ options={[ { value: AudioCodec.Aac, text: 'AAC' }, { value: AudioCodec.Mp3, text: 'MP3' }, - { value: AudioCodec.Libopus, text: 'Opus' }, + { value: AudioCodec.Opus, text: 'Opus' }, { value: AudioCodec.PcmS16Le, text: 'PCM (16 bit)' }, ]} isEdited={!isEqual( @@ -174,7 +174,7 @@ options={[ { value: AudioCodec.Aac, text: 'aac' }, { value: AudioCodec.Mp3, text: 'mp3' }, - { value: AudioCodec.Libopus, text: 'opus' }, + { value: AudioCodec.Opus, text: 'opus' }, ]} name="acodec" isEdited={configToEdit.ffmpeg.targetAudioCodec !== config.ffmpeg.targetAudioCodec} From aaf34fa7d4fb2b47a9d29f53c2d41f40b2c13637 Mon Sep 17 00:00:00 2001 From: Aleksander Pejcic Date: Sat, 7 Mar 2026 19:40:43 +0100 Subject: [PATCH 04/26] feat(ml): enable openvino for cpu (#22948) * Enable OpenVINO CPU acceleration in Immich * Remove unnecessary debug log * Removing checking for device_ids for openvino since cpu will always be available * Find OpenVINOExecutionProvider index instead of assuming index 0 * Fix openvino tests * Fix failing test mock. OpenVINO expects provider options, but cuda provide doesn't so use that for mocked tests. * Support empty provider options in OrtSessions in which case ONNXRuntime will use its own defaults * Use OpenVINOExecutionProvider for test_sets_provider_options_kwarg * fix mock * simplify * unused variable --------- Co-authored-by: Aleksander Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com> --- machine-learning/immich_ml/sessions/ort.py | 25 +++++++------ machine-learning/test_main.py | 43 +++++++++++++++++----- 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/machine-learning/immich_ml/sessions/ort.py b/machine-learning/immich_ml/sessions/ort.py index 5b728fce6f..bebd235970 100644 --- a/machine-learning/immich_ml/sessions/ort.py +++ b/machine-learning/immich_ml/sessions/ort.py @@ -64,14 +64,6 @@ class OrtSession: def _providers_default(self) -> list[str]: available_providers = set(ort.get_available_providers()) log.debug(f"Available ORT providers: {available_providers}") - if (openvino := "OpenVINOExecutionProvider") in available_providers: - device_ids: list[str] = ort.capi._pybind_state.get_available_openvino_device_ids() - log.debug(f"Available OpenVINO devices: {device_ids}") - - gpu_devices = [device_id for device_id in device_ids if device_id.startswith("GPU")] - if not gpu_devices: - log.warning("No GPU device found in OpenVINO. Falling back to CPU.") - available_providers.remove(openvino) return [provider for provider in SUPPORTED_PROVIDERS if provider in available_providers] @property @@ -102,12 +94,19 @@ class OrtSession: "migraphx_fp16_enable": "1" if settings.rocm_precision == ModelPrecision.FP16 else "0", } case "OpenVINOExecutionProvider": - openvino_dir = self.model_path.parent / "openvino" - device = f"GPU.{settings.device_id}" + device_ids: list[str] = ort.capi._pybind_state.get_available_openvino_device_ids() + # Check for available devices, preferring GPU over CPU + gpu_devices = [d for d in device_ids if d.startswith("GPU")] + if gpu_devices: + device_type = f"GPU.{settings.device_id}" + log.debug(f"OpenVINO: Using GPU device {device_type}") + else: + device_type = "CPU" + log.debug("OpenVINO: No GPU found, using CPU") options = { - "device_type": device, + "device_type": device_type, "precision": settings.openvino_precision.value, - "cache_dir": openvino_dir.as_posix(), + "cache_dir": (self.model_path.parent / "openvino").as_posix(), } case "CoreMLExecutionProvider": options = { @@ -139,12 +138,14 @@ class OrtSession: sess_options.enable_cpu_mem_arena = settings.model_arena # avoid thread contention between models + # Set inter_op threads if settings.model_inter_op_threads > 0: sess_options.inter_op_num_threads = settings.model_inter_op_threads # these defaults work well for CPU, but bottleneck GPU elif settings.model_inter_op_threads == 0 and self.providers == ["CPUExecutionProvider"]: sess_options.inter_op_num_threads = 1 + # Set intra_op threads if settings.model_intra_op_threads > 0: sess_options.intra_op_num_threads = settings.model_intra_op_threads elif settings.model_intra_op_threads == 0 and self.providers == ["CPUExecutionProvider"]: diff --git a/machine-learning/test_main.py b/machine-learning/test_main.py index a5cf1acc2e..0182c57c67 100644 --- a/machine-learning/test_main.py +++ b/machine-learning/test_main.py @@ -204,13 +204,6 @@ class TestOrtSession: assert session.providers == self.OV_EP - @pytest.mark.ov_device_ids(["CPU"]) - @pytest.mark.providers(OV_EP) - def test_avoids_openvino_if_gpu_not_available(self, providers: list[str], ov_device_ids: list[str]) -> None: - session = OrtSession("ViT-B-32__openai") - - assert session.providers == self.CPU_EP - @pytest.mark.providers(CUDA_EP_OUT_OF_ORDER) def test_sets_providers_in_correct_order(self, providers: list[str]) -> None: session = OrtSession("ViT-B-32__openai") @@ -256,7 +249,8 @@ class TestOrtSession: {"arena_extend_strategy": "kSameAsRequested"}, ] - def test_sets_provider_options_for_openvino(self) -> None: + @pytest.mark.ov_device_ids(["GPU.0", "GPU.1", "CPU"]) + def test_sets_provider_options_for_openvino(self, ov_device_ids: list[str]) -> None: model_path = "/cache/ViT-B-32__openai/textual/model.onnx" os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1" @@ -270,7 +264,8 @@ class TestOrtSession: } ] - def test_sets_openvino_to_fp16_if_enabled(self, mocker: MockerFixture) -> None: + @pytest.mark.ov_device_ids(["GPU.0", "GPU.1", "CPU"]) + def test_sets_openvino_to_fp16_if_enabled(self, ov_device_ids: list[str], mocker: MockerFixture) -> None: model_path = "/cache/ViT-B-32__openai/textual/model.onnx" os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1" mocker.patch.object(settings, "openvino_precision", ModelPrecision.FP16) @@ -285,6 +280,19 @@ class TestOrtSession: } ] + @pytest.mark.ov_device_ids(["CPU"]) + def test_sets_provider_options_for_openvino_cpu(self, ov_device_ids: list[str]) -> None: + model_path = "/cache/ViT-B-32__openai/model.onnx" + session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"]) + + assert session.provider_options == [ + { + "device_type": "CPU", + "precision": "FP32", + "cache_dir": "/cache/ViT-B-32__openai/openvino", + } + ] + def test_sets_provider_options_for_cuda(self) -> None: os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1" @@ -341,6 +349,23 @@ class TestOrtSession: assert session.sess_options.inter_op_num_threads == 1 assert session.sess_options.intra_op_num_threads == 2 + @pytest.mark.ov_device_ids(["CPU"]) + def test_sets_default_sess_options_if_openvino_cpu(self, ov_device_ids: list[str]) -> None: + model_path = "/cache/ViT-B-32__openai/model.onnx" + session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"]) + + assert session.sess_options.execution_mode == ort.ExecutionMode.ORT_SEQUENTIAL + assert session.sess_options.inter_op_num_threads == 0 + assert session.sess_options.intra_op_num_threads == 0 + + @pytest.mark.ov_device_ids(["GPU.0", "CPU"]) + def test_sets_default_sess_options_if_openvino_gpu(self, ov_device_ids: list[str]) -> None: + model_path = "/cache/ViT-B-32__openai/model.onnx" + session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"]) + + assert session.sess_options.inter_op_num_threads == 0 + assert session.sess_options.intra_op_num_threads == 0 + def test_sets_default_sess_options_does_not_set_threads_if_non_cpu_and_default_threads(self) -> None: session = OrtSession("ViT-B-32__openai", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) From 7a83baaf272752c54f5efa08b31a68f0b31bccff Mon Sep 17 00:00:00 2001 From: Min Idzelis Date: Sun, 8 Mar 2026 03:37:41 -0400 Subject: [PATCH 05/26] feat: responsive video duration in thumbnail (#26770) --- .../components/assets/thumbnail/thumbnail.svelte | 14 +++++++------- .../assets/thumbnail/video-thumbnail.svelte | 10 +++++++--- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/web/src/lib/components/assets/thumbnail/thumbnail.svelte b/web/src/lib/components/assets/thumbnail/thumbnail.svelte index 6a786c6417..64b5a835ed 100644 --- a/web/src/lib/components/assets/thumbnail/thumbnail.svelte +++ b/web/src/lib/components/assets/thumbnail/thumbnail.svelte @@ -334,27 +334,27 @@ {#if !authManager.isSharedLink && asset.isFavorite} -
+
{/if} {#if !!assetOwner} -
-

+

+

{assetOwner.name}

{/if} {#if !authManager.isSharedLink && showArchiveIcon && asset.visibility === AssetVisibility.Archive} -
+
{/if} {#if asset.isImage && asset.projectionType === ProjectionType.EQUIRECTANGULAR} -
+
@@ -362,7 +362,7 @@ {/if} {#if asset.isImage && asset.duration && !asset.duration.includes('0:00:00.000')} -
+
@@ -374,7 +374,7 @@
diff --git a/web/src/lib/components/assets/thumbnail/video-thumbnail.svelte b/web/src/lib/components/assets/thumbnail/video-thumbnail.svelte index 9d3a6bfcb6..b4772cc1c4 100644 --- a/web/src/lib/components/assets/thumbnail/video-thumbnail.svelte +++ b/web/src/lib/components/assets/thumbnail/video-thumbnail.svelte @@ -89,10 +89,10 @@ {/if}
{#if showTime} - +