diff --git a/i18n/en.json b/i18n/en.json
index 4e3f274340..276ca92891 100644
--- a/i18n/en.json
+++ b/i18n/en.json
@@ -1158,6 +1158,7 @@
"hide_named_person": "Hide person {name}",
"hide_password": "Hide password",
"hide_person": "Hide person",
+ "hide_text_recognition": "Hide text recognition",
"hide_unnamed_people": "Hide unnamed people",
"home_page_add_to_album_conflicts": "Added {added} assets to album {album}. {failed} assets are already in the album.",
"home_page_add_to_album_err_local": "Can not add local assets to albums yet, skipping",
@@ -1967,6 +1968,7 @@
"show_slideshow_transition": "Show slideshow transition",
"show_supporter_badge": "Supporter badge",
"show_supporter_badge_description": "Show a supporter badge",
+ "show_text_recognition": "Show text recognition",
"show_text_search_menu": "Show text search menu",
"shuffle": "Shuffle",
"sidebar": "Sidebar",
@@ -2037,6 +2039,7 @@
"tags": "Tags",
"tap_to_run_job": "Tap to run job",
"template": "Template",
+ "text_recognition": "Text recognition",
"theme": "Theme",
"theme_selection": "Theme selection",
"theme_selection_description": "Automatically set the theme to light or dark based on your browser's system preference",
diff --git a/web/src/lib/actions/zoom-image.ts b/web/src/lib/actions/zoom-image.ts
index 29074fc7b0..e67d3e1928 100644
--- a/web/src/lib/actions/zoom-image.ts
+++ b/web/src/lib/actions/zoom-image.ts
@@ -2,7 +2,7 @@ import { photoZoomState } from '$lib/stores/zoom-image.store';
import { useZoomImageWheel } from '@zoom-image/svelte';
import { get } from 'svelte/store';
-export const zoomImageAction = (node: HTMLElement) => {
+export const zoomImageAction = (node: HTMLElement, options?: { disabled?: boolean }) => {
const { createZoomImage, zoomImageState, setZoomImageState } = useZoomImageWheel();
createZoomImage(node, {
@@ -14,9 +14,32 @@ export const zoomImageAction = (node: HTMLElement) => {
setZoomImageState(state);
}
+ // Store original event handlers so we can prevent them when disabled
+ const wheelHandler = (event: WheelEvent) => {
+ if (options?.disabled) {
+ event.stopImmediatePropagation();
+ }
+ };
+
+ const pointerDownHandler = (event: PointerEvent) => {
+ if (options?.disabled) {
+ event.stopImmediatePropagation();
+ }
+ };
+
+ // Add handlers at capture phase with higher priority
+ node.addEventListener('wheel', wheelHandler, { capture: true });
+ node.addEventListener('pointerdown', pointerDownHandler, { capture: true });
+
const unsubscribes = [photoZoomState.subscribe(setZoomImageState), zoomImageState.subscribe(photoZoomState.set)];
+
return {
+ update(newOptions?: { disabled?: boolean }) {
+ options = newOptions;
+ },
destroy() {
+ node.removeEventListener('wheel', wheelHandler, { capture: true });
+ node.removeEventListener('pointerdown', pointerDownHandler, { capture: true });
for (const unsubscribe of unsubscribes) {
unsubscribe();
}
diff --git a/web/src/lib/components/asset-viewer/asset-viewer.svelte b/web/src/lib/components/asset-viewer/asset-viewer.svelte
index e26c85ad07..0af27e8373 100644
--- a/web/src/lib/components/asset-viewer/asset-viewer.svelte
+++ b/web/src/lib/components/asset-viewer/asset-viewer.svelte
@@ -13,6 +13,7 @@
import type { TimelineAsset } from '$lib/managers/timeline-manager/types';
import { closeEditorCofirm } from '$lib/stores/asset-editor.store';
import { assetViewingStore } from '$lib/stores/asset-viewing.store';
+ import { ocrManager } from '$lib/stores/ocr.svelte';
import { alwaysLoadOriginalVideo, isShowDetail } from '$lib/stores/preferences.store';
import { SlideshowNavigation, SlideshowState, slideshowStore } from '$lib/stores/slideshow.store';
import { user } from '$lib/stores/user.store';
@@ -44,6 +45,7 @@
import CropArea from './editor/crop-tool/crop-area.svelte';
import EditorPanel from './editor/editor-panel.svelte';
import ImagePanoramaViewer from './image-panorama-viewer.svelte';
+ import OcrButton from './ocr-button.svelte';
import PhotoViewer from './photo-viewer.svelte';
import SlideshowBar from './slideshow-bar.svelte';
import VideoViewer from './video-wrapper-viewer.svelte';
@@ -392,9 +394,13 @@
handlePromiseError(activityManager.init(album.id, asset.id));
}
});
+
+ let currentAssetId = $derived(asset.id);
$effect(() => {
- if (asset.id) {
- handlePromiseError(handleGetAllAlbums());
+ if (currentAssetId) {
+ untrack(() => handlePromiseError(handleGetAllAlbums()));
+ ocrManager.clear();
+ handlePromiseError(ocrManager.getAssetOcr(currentAssetId));
}
});
@@ -535,6 +541,7 @@
{playOriginalVideo}
/>
{/if}
+
{#if $slideshowState === SlideshowState.None && isShared && ((album && album.isActivityEnabled) || activityManager.commentCount > 0) && !activityManager.isLoading}
{/if}
+
+ {#if $slideshowState === SlideshowState.None && asset.type === AssetTypeEnum.Image && !isShowEditor && ocrManager.hasOcrData}
+
+
+
+ {/if}
{/key}
{/if}
diff --git a/web/src/lib/components/asset-viewer/detail-panel.svelte b/web/src/lib/components/asset-viewer/detail-panel.svelte
index a9c447e498..2ee4496830 100644
--- a/web/src/lib/components/asset-viewer/detail-panel.svelte
+++ b/web/src/lib/components/asset-viewer/detail-panel.svelte
@@ -503,7 +503,7 @@
{/if}
{#if albums.length > 0}
-
+
{$t('appears_in')}
{#each albums as album (album.id)}
diff --git a/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte b/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte
new file mode 100644
index 0000000000..e64b674ac1
--- /dev/null
+++ b/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+
+
+ {ocrBox.text}
+
+
diff --git a/web/src/lib/components/asset-viewer/ocr-button.svelte b/web/src/lib/components/asset-viewer/ocr-button.svelte
new file mode 100644
index 0000000000..9f8966e64a
--- /dev/null
+++ b/web/src/lib/components/asset-viewer/ocr-button.svelte
@@ -0,0 +1,17 @@
+
+
+ ocrManager.toggleOcrBoundingBox()}
+/>
diff --git a/web/src/lib/components/asset-viewer/photo-viewer.svelte b/web/src/lib/components/asset-viewer/photo-viewer.svelte
index e37773fca5..261f194d34 100644
--- a/web/src/lib/components/asset-viewer/photo-viewer.svelte
+++ b/web/src/lib/components/asset-viewer/photo-viewer.svelte
@@ -2,12 +2,14 @@
import { shortcuts } from '$lib/actions/shortcut';
import { zoomImageAction } from '$lib/actions/zoom-image';
import FaceEditor from '$lib/components/asset-viewer/face-editor/face-editor.svelte';
+ import OcrBoundingBox from '$lib/components/asset-viewer/ocr-bounding-box.svelte';
import BrokenAsset from '$lib/components/assets/broken-asset.svelte';
import { assetViewerFadeDuration } from '$lib/constants';
import { castManager } from '$lib/managers/cast-manager.svelte';
import type { TimelineAsset } from '$lib/managers/timeline-manager/types';
import { photoViewerImgElement } from '$lib/stores/assets-store.svelte';
import { isFaceEditMode } from '$lib/stores/face-edit.svelte';
+ import { ocrManager } from '$lib/stores/ocr.svelte';
import { boundingBoxesArray } from '$lib/stores/people.store';
import { alwaysLoadOriginalFile } from '$lib/stores/preferences.store';
import { SlideshowLook, SlideshowState, slideshowLookCssMapping, slideshowStore } from '$lib/stores/slideshow.store';
@@ -15,6 +17,7 @@
import { getAssetOriginalUrl, getAssetThumbnailUrl, handlePromiseError } from '$lib/utils';
import { canCopyImageToClipboard, copyImageToClipboard, isWebCompatibleImage } from '$lib/utils/asset-utils';
import { handleError } from '$lib/utils/handle-error';
+ import { getOcrBoundingBoxes } from '$lib/utils/ocr-utils';
import { getBoundingBox } from '$lib/utils/people-utils';
import { cancelImageUrl } from '$lib/utils/sw-messaging';
import { getAltText } from '$lib/utils/thumbnail-util';
@@ -71,6 +74,14 @@
$boundingBoxesArray = [];
});
+ let ocrBoxes = $derived(
+ ocrManager.showOverlay && $photoViewerImgElement
+ ? getOcrBoundingBoxes(ocrManager.data, $photoZoomState, $photoViewerImgElement)
+ : [],
+ );
+
+ let isOcrActive = $derived(ocrManager.showOverlay);
+
const preload = (targetSize: AssetMediaSize | 'original', preloadAssets?: TimelineAsset[]) => {
for (const preloadAsset of preloadAssets || []) {
if (preloadAsset.isImage) {
@@ -130,9 +141,15 @@
if ($photoZoomState.currentZoom > 1) {
return;
}
+
+ if (ocrManager.showOverlay) {
+ return;
+ }
+
if (onNextAsset && event.detail.direction === 'left') {
onNextAsset();
}
+
if (onPreviousAsset && event.detail.direction === 'right') {
onPreviousAsset();
}
@@ -235,7 +252,7 @@
{:else if !imageError}
{/each}
+
+ {#each ocrBoxes as ocrBox (ocrBox.id)}
+
+ {/each}
{#if isFaceEditMode.value}
diff --git a/web/src/lib/stores/ocr.svelte.ts b/web/src/lib/stores/ocr.svelte.ts
new file mode 100644
index 0000000000..4922f630ec
--- /dev/null
+++ b/web/src/lib/stores/ocr.svelte.ts
@@ -0,0 +1,44 @@
+import { getAssetOcr } from '@immich/sdk';
+
+export type OcrBoundingBox = {
+ id: string;
+ assetId: string;
+ x1: number;
+ y1: number;
+ x2: number;
+ y2: number;
+ x3: number;
+ y3: number;
+ x4: number;
+ y4: number;
+ boxScore: number;
+ textScore: number;
+ text: string;
+};
+
+class OcrManager {
+ #data = $state([]);
+ showOverlay = $state(false);
+ hasOcrData = $state(false);
+
+ get data() {
+ return this.#data;
+ }
+
+ async getAssetOcr(id: string) {
+ this.#data = await getAssetOcr({ id });
+ this.hasOcrData = this.#data.length > 0;
+ }
+
+ clear() {
+ this.#data = [];
+ this.showOverlay = false;
+ this.hasOcrData = false;
+ }
+
+ toggleOcrBoundingBox() {
+ this.showOverlay = !this.showOverlay;
+ }
+}
+
+export const ocrManager = new OcrManager();
diff --git a/web/src/lib/utils/ocr-utils.ts b/web/src/lib/utils/ocr-utils.ts
new file mode 100644
index 0000000000..97364d06f5
--- /dev/null
+++ b/web/src/lib/utils/ocr-utils.ts
@@ -0,0 +1,131 @@
+import type { OcrBoundingBox } from '$lib/stores/ocr.svelte';
+import type { ZoomImageWheelState } from '@zoom-image/core';
+
+const getContainedSize = (img: HTMLImageElement): { width: number; height: number } => {
+ const ratio = img.naturalWidth / img.naturalHeight;
+ let width = img.height * ratio;
+ let height = img.height;
+ if (width > img.width) {
+ width = img.width;
+ height = img.width / ratio;
+ }
+ return { width, height };
+};
+
+export interface OcrBox {
+ id: string;
+ points: { x: number; y: number }[];
+ text: string;
+ confidence: number;
+}
+
+export interface BoundingBoxDimensions {
+ minX: number;
+ maxX: number;
+ minY: number;
+ maxY: number;
+ width: number;
+ height: number;
+ centerX: number;
+ centerY: number;
+ rotation: number;
+ skewX: number;
+ skewY: number;
+}
+
+/**
+ * Calculate bounding box dimensions and properties from OCR points
+ * @param points - Array of 4 corner points of the bounding box
+ * @returns Dimensions, rotation, and skew values for the bounding box
+ */
+export const calculateBoundingBoxDimensions = (points: { x: number; y: number }[]): BoundingBoxDimensions => {
+ const [topLeft, topRight, bottomRight, bottomLeft] = points;
+ const minX = Math.min(...points.map(({ x }) => x));
+ const maxX = Math.max(...points.map(({ x }) => x));
+ const minY = Math.min(...points.map(({ y }) => y));
+ const maxY = Math.max(...points.map(({ y }) => y));
+ const width = maxX - minX;
+ const height = maxY - minY;
+ const centerX = (minX + maxX) / 2;
+ const centerY = (minY + maxY) / 2;
+
+ // Calculate rotation angle from the bottom edge (bottomLeft to bottomRight)
+ const rotation = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x) * (180 / Math.PI);
+
+ // Calculate skew angles to handle perspective distortion
+ // SkewX: compare left and right edges
+ const leftEdgeAngle = Math.atan2(bottomLeft.y - topLeft.y, bottomLeft.x - topLeft.x);
+ const rightEdgeAngle = Math.atan2(bottomRight.y - topRight.y, bottomRight.x - topRight.x);
+ const skewX = (rightEdgeAngle - leftEdgeAngle) * (180 / Math.PI);
+
+ // SkewY: compare top and bottom edges
+ const topEdgeAngle = Math.atan2(topRight.y - topLeft.y, topRight.x - topLeft.x);
+ const bottomEdgeAngle = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x);
+ const skewY = (bottomEdgeAngle - topEdgeAngle) * (180 / Math.PI);
+
+ return {
+ minX,
+ maxX,
+ minY,
+ maxY,
+ width,
+ height,
+ centerX,
+ centerY,
+ rotation,
+ skewX,
+ skewY,
+ };
+};
+
+/**
+ * Convert normalized OCR coordinates to screen coordinates
+ * OCR coordinates are normalized (0-1) and represent the 4 corners of a rotated rectangle
+ */
+export const getOcrBoundingBoxes = (
+ ocrData: OcrBoundingBox[],
+ zoom: ZoomImageWheelState,
+ photoViewer: HTMLImageElement | null,
+): OcrBox[] => {
+ const boxes: OcrBox[] = [];
+
+ if (photoViewer === null || !photoViewer.naturalWidth || !photoViewer.naturalHeight) {
+ return boxes;
+ }
+
+ const clientHeight = photoViewer.clientHeight;
+ const clientWidth = photoViewer.clientWidth;
+ const { width, height } = getContainedSize(photoViewer);
+
+ const imageWidth = photoViewer.naturalWidth;
+ const imageHeight = photoViewer.naturalHeight;
+
+ for (const ocr of ocrData) {
+ // Convert normalized coordinates (0-1) to actual pixel positions
+ // OCR provides 4 corners of a potentially rotated rectangle
+ const points = [
+ { x: ocr.x1, y: ocr.y1 },
+ { x: ocr.x2, y: ocr.y2 },
+ { x: ocr.x3, y: ocr.y3 },
+ { x: ocr.x4, y: ocr.y4 },
+ ].map((point) => ({
+ x:
+ (width / imageWidth) * zoom.currentZoom * point.x * imageWidth +
+ ((clientWidth - width) / 2) * zoom.currentZoom +
+ zoom.currentPositionX,
+ y:
+ (height / imageHeight) * zoom.currentZoom * point.y * imageHeight +
+ ((clientHeight - height) / 2) * zoom.currentZoom +
+ zoom.currentPositionY,
+ }));
+
+ boxes.push({
+ id: ocr.id,
+ points,
+ text: ocr.text,
+ confidence: ocr.textScore,
+ });
+ }
+
+ return boxes;
+};