diff --git a/i18n/en.json b/i18n/en.json index 4e3f274340..276ca92891 100644 --- a/i18n/en.json +++ b/i18n/en.json @@ -1158,6 +1158,7 @@ "hide_named_person": "Hide person {name}", "hide_password": "Hide password", "hide_person": "Hide person", + "hide_text_recognition": "Hide text recognition", "hide_unnamed_people": "Hide unnamed people", "home_page_add_to_album_conflicts": "Added {added} assets to album {album}. {failed} assets are already in the album.", "home_page_add_to_album_err_local": "Can not add local assets to albums yet, skipping", @@ -1967,6 +1968,7 @@ "show_slideshow_transition": "Show slideshow transition", "show_supporter_badge": "Supporter badge", "show_supporter_badge_description": "Show a supporter badge", + "show_text_recognition": "Show text recognition", "show_text_search_menu": "Show text search menu", "shuffle": "Shuffle", "sidebar": "Sidebar", @@ -2037,6 +2039,7 @@ "tags": "Tags", "tap_to_run_job": "Tap to run job", "template": "Template", + "text_recognition": "Text recognition", "theme": "Theme", "theme_selection": "Theme selection", "theme_selection_description": "Automatically set the theme to light or dark based on your browser's system preference", diff --git a/web/src/lib/actions/zoom-image.ts b/web/src/lib/actions/zoom-image.ts index 29074fc7b0..e67d3e1928 100644 --- a/web/src/lib/actions/zoom-image.ts +++ b/web/src/lib/actions/zoom-image.ts @@ -2,7 +2,7 @@ import { photoZoomState } from '$lib/stores/zoom-image.store'; import { useZoomImageWheel } from '@zoom-image/svelte'; import { get } from 'svelte/store'; -export const zoomImageAction = (node: HTMLElement) => { +export const zoomImageAction = (node: HTMLElement, options?: { disabled?: boolean }) => { const { createZoomImage, zoomImageState, setZoomImageState } = useZoomImageWheel(); createZoomImage(node, { @@ -14,9 +14,32 @@ export const zoomImageAction = (node: HTMLElement) => { setZoomImageState(state); } + // Store original event handlers so we can prevent them when disabled + const wheelHandler = (event: WheelEvent) => { + if (options?.disabled) { + event.stopImmediatePropagation(); + } + }; + + const pointerDownHandler = (event: PointerEvent) => { + if (options?.disabled) { + event.stopImmediatePropagation(); + } + }; + + // Add handlers at capture phase with higher priority + node.addEventListener('wheel', wheelHandler, { capture: true }); + node.addEventListener('pointerdown', pointerDownHandler, { capture: true }); + const unsubscribes = [photoZoomState.subscribe(setZoomImageState), zoomImageState.subscribe(photoZoomState.set)]; + return { + update(newOptions?: { disabled?: boolean }) { + options = newOptions; + }, destroy() { + node.removeEventListener('wheel', wheelHandler, { capture: true }); + node.removeEventListener('pointerdown', pointerDownHandler, { capture: true }); for (const unsubscribe of unsubscribes) { unsubscribe(); } diff --git a/web/src/lib/components/asset-viewer/asset-viewer.svelte b/web/src/lib/components/asset-viewer/asset-viewer.svelte index e26c85ad07..0af27e8373 100644 --- a/web/src/lib/components/asset-viewer/asset-viewer.svelte +++ b/web/src/lib/components/asset-viewer/asset-viewer.svelte @@ -13,6 +13,7 @@ import type { TimelineAsset } from '$lib/managers/timeline-manager/types'; import { closeEditorCofirm } from '$lib/stores/asset-editor.store'; import { assetViewingStore } from '$lib/stores/asset-viewing.store'; + import { ocrManager } from '$lib/stores/ocr.svelte'; import { alwaysLoadOriginalVideo, isShowDetail } from '$lib/stores/preferences.store'; import { SlideshowNavigation, SlideshowState, slideshowStore } from '$lib/stores/slideshow.store'; import { user } from '$lib/stores/user.store'; @@ -44,6 +45,7 @@ import CropArea from './editor/crop-tool/crop-area.svelte'; import EditorPanel from './editor/editor-panel.svelte'; import ImagePanoramaViewer from './image-panorama-viewer.svelte'; + import OcrButton from './ocr-button.svelte'; import PhotoViewer from './photo-viewer.svelte'; import SlideshowBar from './slideshow-bar.svelte'; import VideoViewer from './video-wrapper-viewer.svelte'; @@ -392,9 +394,13 @@ handlePromiseError(activityManager.init(album.id, asset.id)); } }); + + let currentAssetId = $derived(asset.id); $effect(() => { - if (asset.id) { - handlePromiseError(handleGetAllAlbums()); + if (currentAssetId) { + untrack(() => handlePromiseError(handleGetAllAlbums())); + ocrManager.clear(); + handlePromiseError(ocrManager.getAssetOcr(currentAssetId)); } }); @@ -535,6 +541,7 @@ {playOriginalVideo} /> {/if} + {#if $slideshowState === SlideshowState.None && isShared && ((album && album.isActivityEnabled) || activityManager.commentCount > 0) && !activityManager.isLoading}
{/if} + + {#if $slideshowState === SlideshowState.None && asset.type === AssetTypeEnum.Image && !isShowEditor && ocrManager.hasOcrData} +
+ +
+ {/if} {/key} {/if} diff --git a/web/src/lib/components/asset-viewer/detail-panel.svelte b/web/src/lib/components/asset-viewer/detail-panel.svelte index a9c447e498..2ee4496830 100644 --- a/web/src/lib/components/asset-viewer/detail-panel.svelte +++ b/web/src/lib/components/asset-viewer/detail-panel.svelte @@ -503,7 +503,7 @@ {/if} {#if albums.length > 0} -
+

{$t('appears_in')}

{#each albums as album (album.id)} diff --git a/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte b/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte new file mode 100644 index 0000000000..e64b674ac1 --- /dev/null +++ b/web/src/lib/components/asset-viewer/ocr-bounding-box.svelte @@ -0,0 +1,36 @@ + + +
+ +
+ + +
+ {ocrBox.text} +
+
diff --git a/web/src/lib/components/asset-viewer/ocr-button.svelte b/web/src/lib/components/asset-viewer/ocr-button.svelte new file mode 100644 index 0000000000..9f8966e64a --- /dev/null +++ b/web/src/lib/components/asset-viewer/ocr-button.svelte @@ -0,0 +1,17 @@ + + + ocrManager.toggleOcrBoundingBox()} +/> diff --git a/web/src/lib/components/asset-viewer/photo-viewer.svelte b/web/src/lib/components/asset-viewer/photo-viewer.svelte index e37773fca5..261f194d34 100644 --- a/web/src/lib/components/asset-viewer/photo-viewer.svelte +++ b/web/src/lib/components/asset-viewer/photo-viewer.svelte @@ -2,12 +2,14 @@ import { shortcuts } from '$lib/actions/shortcut'; import { zoomImageAction } from '$lib/actions/zoom-image'; import FaceEditor from '$lib/components/asset-viewer/face-editor/face-editor.svelte'; + import OcrBoundingBox from '$lib/components/asset-viewer/ocr-bounding-box.svelte'; import BrokenAsset from '$lib/components/assets/broken-asset.svelte'; import { assetViewerFadeDuration } from '$lib/constants'; import { castManager } from '$lib/managers/cast-manager.svelte'; import type { TimelineAsset } from '$lib/managers/timeline-manager/types'; import { photoViewerImgElement } from '$lib/stores/assets-store.svelte'; import { isFaceEditMode } from '$lib/stores/face-edit.svelte'; + import { ocrManager } from '$lib/stores/ocr.svelte'; import { boundingBoxesArray } from '$lib/stores/people.store'; import { alwaysLoadOriginalFile } from '$lib/stores/preferences.store'; import { SlideshowLook, SlideshowState, slideshowLookCssMapping, slideshowStore } from '$lib/stores/slideshow.store'; @@ -15,6 +17,7 @@ import { getAssetOriginalUrl, getAssetThumbnailUrl, handlePromiseError } from '$lib/utils'; import { canCopyImageToClipboard, copyImageToClipboard, isWebCompatibleImage } from '$lib/utils/asset-utils'; import { handleError } from '$lib/utils/handle-error'; + import { getOcrBoundingBoxes } from '$lib/utils/ocr-utils'; import { getBoundingBox } from '$lib/utils/people-utils'; import { cancelImageUrl } from '$lib/utils/sw-messaging'; import { getAltText } from '$lib/utils/thumbnail-util'; @@ -71,6 +74,14 @@ $boundingBoxesArray = []; }); + let ocrBoxes = $derived( + ocrManager.showOverlay && $photoViewerImgElement + ? getOcrBoundingBoxes(ocrManager.data, $photoZoomState, $photoViewerImgElement) + : [], + ); + + let isOcrActive = $derived(ocrManager.showOverlay); + const preload = (targetSize: AssetMediaSize | 'original', preloadAssets?: TimelineAsset[]) => { for (const preloadAsset of preloadAssets || []) { if (preloadAsset.isImage) { @@ -130,9 +141,15 @@ if ($photoZoomState.currentZoom > 1) { return; } + + if (ocrManager.showOverlay) { + return; + } + if (onNextAsset && event.detail.direction === 'left') { onNextAsset(); } + if (onPreviousAsset && event.detail.direction === 'right') { onPreviousAsset(); } @@ -235,7 +252,7 @@ {:else if !imageError}
{/each} + + {#each ocrBoxes as ocrBox (ocrBox.id)} + + {/each} {#if isFaceEditMode.value} diff --git a/web/src/lib/stores/ocr.svelte.ts b/web/src/lib/stores/ocr.svelte.ts new file mode 100644 index 0000000000..4922f630ec --- /dev/null +++ b/web/src/lib/stores/ocr.svelte.ts @@ -0,0 +1,44 @@ +import { getAssetOcr } from '@immich/sdk'; + +export type OcrBoundingBox = { + id: string; + assetId: string; + x1: number; + y1: number; + x2: number; + y2: number; + x3: number; + y3: number; + x4: number; + y4: number; + boxScore: number; + textScore: number; + text: string; +}; + +class OcrManager { + #data = $state([]); + showOverlay = $state(false); + hasOcrData = $state(false); + + get data() { + return this.#data; + } + + async getAssetOcr(id: string) { + this.#data = await getAssetOcr({ id }); + this.hasOcrData = this.#data.length > 0; + } + + clear() { + this.#data = []; + this.showOverlay = false; + this.hasOcrData = false; + } + + toggleOcrBoundingBox() { + this.showOverlay = !this.showOverlay; + } +} + +export const ocrManager = new OcrManager(); diff --git a/web/src/lib/utils/ocr-utils.ts b/web/src/lib/utils/ocr-utils.ts new file mode 100644 index 0000000000..97364d06f5 --- /dev/null +++ b/web/src/lib/utils/ocr-utils.ts @@ -0,0 +1,131 @@ +import type { OcrBoundingBox } from '$lib/stores/ocr.svelte'; +import type { ZoomImageWheelState } from '@zoom-image/core'; + +const getContainedSize = (img: HTMLImageElement): { width: number; height: number } => { + const ratio = img.naturalWidth / img.naturalHeight; + let width = img.height * ratio; + let height = img.height; + if (width > img.width) { + width = img.width; + height = img.width / ratio; + } + return { width, height }; +}; + +export interface OcrBox { + id: string; + points: { x: number; y: number }[]; + text: string; + confidence: number; +} + +export interface BoundingBoxDimensions { + minX: number; + maxX: number; + minY: number; + maxY: number; + width: number; + height: number; + centerX: number; + centerY: number; + rotation: number; + skewX: number; + skewY: number; +} + +/** + * Calculate bounding box dimensions and properties from OCR points + * @param points - Array of 4 corner points of the bounding box + * @returns Dimensions, rotation, and skew values for the bounding box + */ +export const calculateBoundingBoxDimensions = (points: { x: number; y: number }[]): BoundingBoxDimensions => { + const [topLeft, topRight, bottomRight, bottomLeft] = points; + const minX = Math.min(...points.map(({ x }) => x)); + const maxX = Math.max(...points.map(({ x }) => x)); + const minY = Math.min(...points.map(({ y }) => y)); + const maxY = Math.max(...points.map(({ y }) => y)); + const width = maxX - minX; + const height = maxY - minY; + const centerX = (minX + maxX) / 2; + const centerY = (minY + maxY) / 2; + + // Calculate rotation angle from the bottom edge (bottomLeft to bottomRight) + const rotation = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x) * (180 / Math.PI); + + // Calculate skew angles to handle perspective distortion + // SkewX: compare left and right edges + const leftEdgeAngle = Math.atan2(bottomLeft.y - topLeft.y, bottomLeft.x - topLeft.x); + const rightEdgeAngle = Math.atan2(bottomRight.y - topRight.y, bottomRight.x - topRight.x); + const skewX = (rightEdgeAngle - leftEdgeAngle) * (180 / Math.PI); + + // SkewY: compare top and bottom edges + const topEdgeAngle = Math.atan2(topRight.y - topLeft.y, topRight.x - topLeft.x); + const bottomEdgeAngle = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x); + const skewY = (bottomEdgeAngle - topEdgeAngle) * (180 / Math.PI); + + return { + minX, + maxX, + minY, + maxY, + width, + height, + centerX, + centerY, + rotation, + skewX, + skewY, + }; +}; + +/** + * Convert normalized OCR coordinates to screen coordinates + * OCR coordinates are normalized (0-1) and represent the 4 corners of a rotated rectangle + */ +export const getOcrBoundingBoxes = ( + ocrData: OcrBoundingBox[], + zoom: ZoomImageWheelState, + photoViewer: HTMLImageElement | null, +): OcrBox[] => { + const boxes: OcrBox[] = []; + + if (photoViewer === null || !photoViewer.naturalWidth || !photoViewer.naturalHeight) { + return boxes; + } + + const clientHeight = photoViewer.clientHeight; + const clientWidth = photoViewer.clientWidth; + const { width, height } = getContainedSize(photoViewer); + + const imageWidth = photoViewer.naturalWidth; + const imageHeight = photoViewer.naturalHeight; + + for (const ocr of ocrData) { + // Convert normalized coordinates (0-1) to actual pixel positions + // OCR provides 4 corners of a potentially rotated rectangle + const points = [ + { x: ocr.x1, y: ocr.y1 }, + { x: ocr.x2, y: ocr.y2 }, + { x: ocr.x3, y: ocr.y3 }, + { x: ocr.x4, y: ocr.y4 }, + ].map((point) => ({ + x: + (width / imageWidth) * zoom.currentZoom * point.x * imageWidth + + ((clientWidth - width) / 2) * zoom.currentZoom + + zoom.currentPositionX, + y: + (height / imageHeight) * zoom.currentZoom * point.y * imageHeight + + ((clientHeight - height) / 2) * zoom.currentZoom + + zoom.currentPositionY, + })); + + boxes.push({ + id: ocr.id, + points, + text: ocr.text, + confidence: ocr.textScore, + }); + } + + return boxes; +};