mirror of
https://github.com/immich-app/immich.git
synced 2025-11-26 00:05:18 -05:00
feat: show OCR bounding box (#23717)
* feat: ocr bounding box * bounding boxes * pr feedback * pr feedback * allow copy across text boxes * pr feedback
This commit is contained in:
parent
f59417cc77
commit
56e431226f
@ -1158,6 +1158,7 @@
|
|||||||
"hide_named_person": "Hide person {name}",
|
"hide_named_person": "Hide person {name}",
|
||||||
"hide_password": "Hide password",
|
"hide_password": "Hide password",
|
||||||
"hide_person": "Hide person",
|
"hide_person": "Hide person",
|
||||||
|
"hide_text_recognition": "Hide text recognition",
|
||||||
"hide_unnamed_people": "Hide unnamed people",
|
"hide_unnamed_people": "Hide unnamed people",
|
||||||
"home_page_add_to_album_conflicts": "Added {added} assets to album {album}. {failed} assets are already in the album.",
|
"home_page_add_to_album_conflicts": "Added {added} assets to album {album}. {failed} assets are already in the album.",
|
||||||
"home_page_add_to_album_err_local": "Can not add local assets to albums yet, skipping",
|
"home_page_add_to_album_err_local": "Can not add local assets to albums yet, skipping",
|
||||||
@ -1967,6 +1968,7 @@
|
|||||||
"show_slideshow_transition": "Show slideshow transition",
|
"show_slideshow_transition": "Show slideshow transition",
|
||||||
"show_supporter_badge": "Supporter badge",
|
"show_supporter_badge": "Supporter badge",
|
||||||
"show_supporter_badge_description": "Show a supporter badge",
|
"show_supporter_badge_description": "Show a supporter badge",
|
||||||
|
"show_text_recognition": "Show text recognition",
|
||||||
"show_text_search_menu": "Show text search menu",
|
"show_text_search_menu": "Show text search menu",
|
||||||
"shuffle": "Shuffle",
|
"shuffle": "Shuffle",
|
||||||
"sidebar": "Sidebar",
|
"sidebar": "Sidebar",
|
||||||
@ -2037,6 +2039,7 @@
|
|||||||
"tags": "Tags",
|
"tags": "Tags",
|
||||||
"tap_to_run_job": "Tap to run job",
|
"tap_to_run_job": "Tap to run job",
|
||||||
"template": "Template",
|
"template": "Template",
|
||||||
|
"text_recognition": "Text recognition",
|
||||||
"theme": "Theme",
|
"theme": "Theme",
|
||||||
"theme_selection": "Theme selection",
|
"theme_selection": "Theme selection",
|
||||||
"theme_selection_description": "Automatically set the theme to light or dark based on your browser's system preference",
|
"theme_selection_description": "Automatically set the theme to light or dark based on your browser's system preference",
|
||||||
|
|||||||
@ -2,7 +2,7 @@ import { photoZoomState } from '$lib/stores/zoom-image.store';
|
|||||||
import { useZoomImageWheel } from '@zoom-image/svelte';
|
import { useZoomImageWheel } from '@zoom-image/svelte';
|
||||||
import { get } from 'svelte/store';
|
import { get } from 'svelte/store';
|
||||||
|
|
||||||
export const zoomImageAction = (node: HTMLElement) => {
|
export const zoomImageAction = (node: HTMLElement, options?: { disabled?: boolean }) => {
|
||||||
const { createZoomImage, zoomImageState, setZoomImageState } = useZoomImageWheel();
|
const { createZoomImage, zoomImageState, setZoomImageState } = useZoomImageWheel();
|
||||||
|
|
||||||
createZoomImage(node, {
|
createZoomImage(node, {
|
||||||
@ -14,9 +14,32 @@ export const zoomImageAction = (node: HTMLElement) => {
|
|||||||
setZoomImageState(state);
|
setZoomImageState(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Store original event handlers so we can prevent them when disabled
|
||||||
|
const wheelHandler = (event: WheelEvent) => {
|
||||||
|
if (options?.disabled) {
|
||||||
|
event.stopImmediatePropagation();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const pointerDownHandler = (event: PointerEvent) => {
|
||||||
|
if (options?.disabled) {
|
||||||
|
event.stopImmediatePropagation();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Add handlers at capture phase with higher priority
|
||||||
|
node.addEventListener('wheel', wheelHandler, { capture: true });
|
||||||
|
node.addEventListener('pointerdown', pointerDownHandler, { capture: true });
|
||||||
|
|
||||||
const unsubscribes = [photoZoomState.subscribe(setZoomImageState), zoomImageState.subscribe(photoZoomState.set)];
|
const unsubscribes = [photoZoomState.subscribe(setZoomImageState), zoomImageState.subscribe(photoZoomState.set)];
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
update(newOptions?: { disabled?: boolean }) {
|
||||||
|
options = newOptions;
|
||||||
|
},
|
||||||
destroy() {
|
destroy() {
|
||||||
|
node.removeEventListener('wheel', wheelHandler, { capture: true });
|
||||||
|
node.removeEventListener('pointerdown', pointerDownHandler, { capture: true });
|
||||||
for (const unsubscribe of unsubscribes) {
|
for (const unsubscribe of unsubscribes) {
|
||||||
unsubscribe();
|
unsubscribe();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -13,6 +13,7 @@
|
|||||||
import type { TimelineAsset } from '$lib/managers/timeline-manager/types';
|
import type { TimelineAsset } from '$lib/managers/timeline-manager/types';
|
||||||
import { closeEditorCofirm } from '$lib/stores/asset-editor.store';
|
import { closeEditorCofirm } from '$lib/stores/asset-editor.store';
|
||||||
import { assetViewingStore } from '$lib/stores/asset-viewing.store';
|
import { assetViewingStore } from '$lib/stores/asset-viewing.store';
|
||||||
|
import { ocrManager } from '$lib/stores/ocr.svelte';
|
||||||
import { alwaysLoadOriginalVideo, isShowDetail } from '$lib/stores/preferences.store';
|
import { alwaysLoadOriginalVideo, isShowDetail } from '$lib/stores/preferences.store';
|
||||||
import { SlideshowNavigation, SlideshowState, slideshowStore } from '$lib/stores/slideshow.store';
|
import { SlideshowNavigation, SlideshowState, slideshowStore } from '$lib/stores/slideshow.store';
|
||||||
import { user } from '$lib/stores/user.store';
|
import { user } from '$lib/stores/user.store';
|
||||||
@ -44,6 +45,7 @@
|
|||||||
import CropArea from './editor/crop-tool/crop-area.svelte';
|
import CropArea from './editor/crop-tool/crop-area.svelte';
|
||||||
import EditorPanel from './editor/editor-panel.svelte';
|
import EditorPanel from './editor/editor-panel.svelte';
|
||||||
import ImagePanoramaViewer from './image-panorama-viewer.svelte';
|
import ImagePanoramaViewer from './image-panorama-viewer.svelte';
|
||||||
|
import OcrButton from './ocr-button.svelte';
|
||||||
import PhotoViewer from './photo-viewer.svelte';
|
import PhotoViewer from './photo-viewer.svelte';
|
||||||
import SlideshowBar from './slideshow-bar.svelte';
|
import SlideshowBar from './slideshow-bar.svelte';
|
||||||
import VideoViewer from './video-wrapper-viewer.svelte';
|
import VideoViewer from './video-wrapper-viewer.svelte';
|
||||||
@ -392,9 +394,13 @@
|
|||||||
handlePromiseError(activityManager.init(album.id, asset.id));
|
handlePromiseError(activityManager.init(album.id, asset.id));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let currentAssetId = $derived(asset.id);
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
if (asset.id) {
|
if (currentAssetId) {
|
||||||
handlePromiseError(handleGetAllAlbums());
|
untrack(() => handlePromiseError(handleGetAllAlbums()));
|
||||||
|
ocrManager.clear();
|
||||||
|
handlePromiseError(ocrManager.getAssetOcr(currentAssetId));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
@ -535,6 +541,7 @@
|
|||||||
{playOriginalVideo}
|
{playOriginalVideo}
|
||||||
/>
|
/>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
{#if $slideshowState === SlideshowState.None && isShared && ((album && album.isActivityEnabled) || activityManager.commentCount > 0) && !activityManager.isLoading}
|
{#if $slideshowState === SlideshowState.None && isShared && ((album && album.isActivityEnabled) || activityManager.commentCount > 0) && !activityManager.isLoading}
|
||||||
<div class="absolute bottom-0 end-0 mb-20 me-8">
|
<div class="absolute bottom-0 end-0 mb-20 me-8">
|
||||||
<ActivityStatus
|
<ActivityStatus
|
||||||
@ -547,6 +554,12 @@
|
|||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
|
{#if $slideshowState === SlideshowState.None && asset.type === AssetTypeEnum.Image && !isShowEditor && ocrManager.hasOcrData}
|
||||||
|
<div class="absolute bottom-0 end-0 mb-6 me-6">
|
||||||
|
<OcrButton />
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
{/key}
|
{/key}
|
||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@ -503,7 +503,7 @@
|
|||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
{#if albums.length > 0}
|
{#if albums.length > 0}
|
||||||
<section class="px-6 pt-6 dark:text-immich-dark-fg">
|
<section class="px-6 py-6 dark:text-immich-dark-fg">
|
||||||
<p class="uppercase pb-4 text-sm">{$t('appears_in')}</p>
|
<p class="uppercase pb-4 text-sm">{$t('appears_in')}</p>
|
||||||
{#each albums as album (album.id)}
|
{#each albums as album (album.id)}
|
||||||
<a href={resolve(`${AppRoute.ALBUMS}/${album.id}`)}>
|
<a href={resolve(`${AppRoute.ALBUMS}/${album.id}`)}>
|
||||||
|
|||||||
36
web/src/lib/components/asset-viewer/ocr-bounding-box.svelte
Normal file
36
web/src/lib/components/asset-viewer/ocr-bounding-box.svelte
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import type { OcrBox } from '$lib/utils/ocr-utils';
|
||||||
|
import { calculateBoundingBoxDimensions } from '$lib/utils/ocr-utils';
|
||||||
|
|
||||||
|
type Props = {
|
||||||
|
ocrBox: OcrBox;
|
||||||
|
};
|
||||||
|
|
||||||
|
let { ocrBox }: Props = $props();
|
||||||
|
|
||||||
|
const dimensions = $derived(calculateBoundingBoxDimensions(ocrBox.points));
|
||||||
|
|
||||||
|
const transform = $derived(
|
||||||
|
`translate(${dimensions.minX}px, ${dimensions.minY}px) rotate(${dimensions.rotation}deg) skew(${dimensions.skewX}deg, ${dimensions.skewY}deg)`,
|
||||||
|
);
|
||||||
|
|
||||||
|
const transformOrigin = $derived(
|
||||||
|
`${dimensions.centerX - dimensions.minX}px ${dimensions.centerY - dimensions.minY}px`,
|
||||||
|
);
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<div class="absolute group left-0 top-0 pointer-events-none">
|
||||||
|
<!-- Bounding box with CSS transforms -->
|
||||||
|
<div
|
||||||
|
class="absolute border-2 border-blue-500 bg-blue-500/10 cursor-pointer pointer-events-auto transition-all group-hover:bg-blue-500/30 group-hover:border-blue-600 group-hover:border-[3px]"
|
||||||
|
style="width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: {transformOrigin};"
|
||||||
|
></div>
|
||||||
|
|
||||||
|
<!-- Text overlay - always rendered but invisible, allows text selection and copy -->
|
||||||
|
<div
|
||||||
|
class="absolute flex items-center justify-center text-transparent text-sm px-2 py-1 pointer-events-auto cursor-text whitespace-pre-wrap wrap-break-word select-text group-hover:text-white group-hover:bg-black/75 group-hover:z-10"
|
||||||
|
style="width: {dimensions.width}px; height: {dimensions.height}px; transform: {transform}; transform-origin: {transformOrigin};"
|
||||||
|
>
|
||||||
|
{ocrBox.text}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
17
web/src/lib/components/asset-viewer/ocr-button.svelte
Normal file
17
web/src/lib/components/asset-viewer/ocr-button.svelte
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
<script lang="ts">
|
||||||
|
import { ocrManager } from '$lib/stores/ocr.svelte';
|
||||||
|
import { IconButton } from '@immich/ui';
|
||||||
|
import { mdiTextRecognition } from '@mdi/js';
|
||||||
|
import { t } from 'svelte-i18n';
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<IconButton
|
||||||
|
title={ocrManager.showOverlay ? $t('hide_text_recognition') : $t('show_text_recognition')}
|
||||||
|
icon={mdiTextRecognition}
|
||||||
|
class={"dark {ocrStore.showOverlay ? 'bg-immich-primary text-white dark' : 'dark'}"}
|
||||||
|
color="secondary"
|
||||||
|
variant="ghost"
|
||||||
|
shape="round"
|
||||||
|
aria-label={$t('text_recognition')}
|
||||||
|
onclick={() => ocrManager.toggleOcrBoundingBox()}
|
||||||
|
/>
|
||||||
@ -2,12 +2,14 @@
|
|||||||
import { shortcuts } from '$lib/actions/shortcut';
|
import { shortcuts } from '$lib/actions/shortcut';
|
||||||
import { zoomImageAction } from '$lib/actions/zoom-image';
|
import { zoomImageAction } from '$lib/actions/zoom-image';
|
||||||
import FaceEditor from '$lib/components/asset-viewer/face-editor/face-editor.svelte';
|
import FaceEditor from '$lib/components/asset-viewer/face-editor/face-editor.svelte';
|
||||||
|
import OcrBoundingBox from '$lib/components/asset-viewer/ocr-bounding-box.svelte';
|
||||||
import BrokenAsset from '$lib/components/assets/broken-asset.svelte';
|
import BrokenAsset from '$lib/components/assets/broken-asset.svelte';
|
||||||
import { assetViewerFadeDuration } from '$lib/constants';
|
import { assetViewerFadeDuration } from '$lib/constants';
|
||||||
import { castManager } from '$lib/managers/cast-manager.svelte';
|
import { castManager } from '$lib/managers/cast-manager.svelte';
|
||||||
import type { TimelineAsset } from '$lib/managers/timeline-manager/types';
|
import type { TimelineAsset } from '$lib/managers/timeline-manager/types';
|
||||||
import { photoViewerImgElement } from '$lib/stores/assets-store.svelte';
|
import { photoViewerImgElement } from '$lib/stores/assets-store.svelte';
|
||||||
import { isFaceEditMode } from '$lib/stores/face-edit.svelte';
|
import { isFaceEditMode } from '$lib/stores/face-edit.svelte';
|
||||||
|
import { ocrManager } from '$lib/stores/ocr.svelte';
|
||||||
import { boundingBoxesArray } from '$lib/stores/people.store';
|
import { boundingBoxesArray } from '$lib/stores/people.store';
|
||||||
import { alwaysLoadOriginalFile } from '$lib/stores/preferences.store';
|
import { alwaysLoadOriginalFile } from '$lib/stores/preferences.store';
|
||||||
import { SlideshowLook, SlideshowState, slideshowLookCssMapping, slideshowStore } from '$lib/stores/slideshow.store';
|
import { SlideshowLook, SlideshowState, slideshowLookCssMapping, slideshowStore } from '$lib/stores/slideshow.store';
|
||||||
@ -15,6 +17,7 @@
|
|||||||
import { getAssetOriginalUrl, getAssetThumbnailUrl, handlePromiseError } from '$lib/utils';
|
import { getAssetOriginalUrl, getAssetThumbnailUrl, handlePromiseError } from '$lib/utils';
|
||||||
import { canCopyImageToClipboard, copyImageToClipboard, isWebCompatibleImage } from '$lib/utils/asset-utils';
|
import { canCopyImageToClipboard, copyImageToClipboard, isWebCompatibleImage } from '$lib/utils/asset-utils';
|
||||||
import { handleError } from '$lib/utils/handle-error';
|
import { handleError } from '$lib/utils/handle-error';
|
||||||
|
import { getOcrBoundingBoxes } from '$lib/utils/ocr-utils';
|
||||||
import { getBoundingBox } from '$lib/utils/people-utils';
|
import { getBoundingBox } from '$lib/utils/people-utils';
|
||||||
import { cancelImageUrl } from '$lib/utils/sw-messaging';
|
import { cancelImageUrl } from '$lib/utils/sw-messaging';
|
||||||
import { getAltText } from '$lib/utils/thumbnail-util';
|
import { getAltText } from '$lib/utils/thumbnail-util';
|
||||||
@ -71,6 +74,14 @@
|
|||||||
$boundingBoxesArray = [];
|
$boundingBoxesArray = [];
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let ocrBoxes = $derived(
|
||||||
|
ocrManager.showOverlay && $photoViewerImgElement
|
||||||
|
? getOcrBoundingBoxes(ocrManager.data, $photoZoomState, $photoViewerImgElement)
|
||||||
|
: [],
|
||||||
|
);
|
||||||
|
|
||||||
|
let isOcrActive = $derived(ocrManager.showOverlay);
|
||||||
|
|
||||||
const preload = (targetSize: AssetMediaSize | 'original', preloadAssets?: TimelineAsset[]) => {
|
const preload = (targetSize: AssetMediaSize | 'original', preloadAssets?: TimelineAsset[]) => {
|
||||||
for (const preloadAsset of preloadAssets || []) {
|
for (const preloadAsset of preloadAssets || []) {
|
||||||
if (preloadAsset.isImage) {
|
if (preloadAsset.isImage) {
|
||||||
@ -130,9 +141,15 @@
|
|||||||
if ($photoZoomState.currentZoom > 1) {
|
if ($photoZoomState.currentZoom > 1) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ocrManager.showOverlay) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (onNextAsset && event.detail.direction === 'left') {
|
if (onNextAsset && event.detail.direction === 'left') {
|
||||||
onNextAsset();
|
onNextAsset();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (onPreviousAsset && event.detail.direction === 'right') {
|
if (onPreviousAsset && event.detail.direction === 'right') {
|
||||||
onPreviousAsset();
|
onPreviousAsset();
|
||||||
}
|
}
|
||||||
@ -235,7 +252,7 @@
|
|||||||
</div>
|
</div>
|
||||||
{:else if !imageError}
|
{:else if !imageError}
|
||||||
<div
|
<div
|
||||||
use:zoomImageAction
|
use:zoomImageAction={{ disabled: isOcrActive }}
|
||||||
{...useSwipe(onSwipe)}
|
{...useSwipe(onSwipe)}
|
||||||
class="h-full w-full"
|
class="h-full w-full"
|
||||||
transition:fade={{ duration: haveFadeTransition ? assetViewerFadeDuration : 0 }}
|
transition:fade={{ duration: haveFadeTransition ? assetViewerFadeDuration : 0 }}
|
||||||
@ -264,6 +281,10 @@
|
|||||||
style="top: {boundingbox.top}px; left: {boundingbox.left}px; height: {boundingbox.height}px; width: {boundingbox.width}px;"
|
style="top: {boundingbox.top}px; left: {boundingbox.left}px; height: {boundingbox.height}px; width: {boundingbox.width}px;"
|
||||||
></div>
|
></div>
|
||||||
{/each}
|
{/each}
|
||||||
|
|
||||||
|
{#each ocrBoxes as ocrBox (ocrBox.id)}
|
||||||
|
<OcrBoundingBox {ocrBox} />
|
||||||
|
{/each}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{#if isFaceEditMode.value}
|
{#if isFaceEditMode.value}
|
||||||
|
|||||||
44
web/src/lib/stores/ocr.svelte.ts
Normal file
44
web/src/lib/stores/ocr.svelte.ts
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
import { getAssetOcr } from '@immich/sdk';
|
||||||
|
|
||||||
|
export type OcrBoundingBox = {
|
||||||
|
id: string;
|
||||||
|
assetId: string;
|
||||||
|
x1: number;
|
||||||
|
y1: number;
|
||||||
|
x2: number;
|
||||||
|
y2: number;
|
||||||
|
x3: number;
|
||||||
|
y3: number;
|
||||||
|
x4: number;
|
||||||
|
y4: number;
|
||||||
|
boxScore: number;
|
||||||
|
textScore: number;
|
||||||
|
text: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OcrManager {
|
||||||
|
#data = $state<OcrBoundingBox[]>([]);
|
||||||
|
showOverlay = $state(false);
|
||||||
|
hasOcrData = $state(false);
|
||||||
|
|
||||||
|
get data() {
|
||||||
|
return this.#data;
|
||||||
|
}
|
||||||
|
|
||||||
|
async getAssetOcr(id: string) {
|
||||||
|
this.#data = await getAssetOcr({ id });
|
||||||
|
this.hasOcrData = this.#data.length > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
clear() {
|
||||||
|
this.#data = [];
|
||||||
|
this.showOverlay = false;
|
||||||
|
this.hasOcrData = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
toggleOcrBoundingBox() {
|
||||||
|
this.showOverlay = !this.showOverlay;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const ocrManager = new OcrManager();
|
||||||
131
web/src/lib/utils/ocr-utils.ts
Normal file
131
web/src/lib/utils/ocr-utils.ts
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
import type { OcrBoundingBox } from '$lib/stores/ocr.svelte';
|
||||||
|
import type { ZoomImageWheelState } from '@zoom-image/core';
|
||||||
|
|
||||||
|
const getContainedSize = (img: HTMLImageElement): { width: number; height: number } => {
|
||||||
|
const ratio = img.naturalWidth / img.naturalHeight;
|
||||||
|
let width = img.height * ratio;
|
||||||
|
let height = img.height;
|
||||||
|
if (width > img.width) {
|
||||||
|
width = img.width;
|
||||||
|
height = img.width / ratio;
|
||||||
|
}
|
||||||
|
return { width, height };
|
||||||
|
};
|
||||||
|
|
||||||
|
export interface OcrBox {
|
||||||
|
id: string;
|
||||||
|
points: { x: number; y: number }[];
|
||||||
|
text: string;
|
||||||
|
confidence: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface BoundingBoxDimensions {
|
||||||
|
minX: number;
|
||||||
|
maxX: number;
|
||||||
|
minY: number;
|
||||||
|
maxY: number;
|
||||||
|
width: number;
|
||||||
|
height: number;
|
||||||
|
centerX: number;
|
||||||
|
centerY: number;
|
||||||
|
rotation: number;
|
||||||
|
skewX: number;
|
||||||
|
skewY: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate bounding box dimensions and properties from OCR points
|
||||||
|
* @param points - Array of 4 corner points of the bounding box
|
||||||
|
* @returns Dimensions, rotation, and skew values for the bounding box
|
||||||
|
*/
|
||||||
|
export const calculateBoundingBoxDimensions = (points: { x: number; y: number }[]): BoundingBoxDimensions => {
|
||||||
|
const [topLeft, topRight, bottomRight, bottomLeft] = points;
|
||||||
|
const minX = Math.min(...points.map(({ x }) => x));
|
||||||
|
const maxX = Math.max(...points.map(({ x }) => x));
|
||||||
|
const minY = Math.min(...points.map(({ y }) => y));
|
||||||
|
const maxY = Math.max(...points.map(({ y }) => y));
|
||||||
|
const width = maxX - minX;
|
||||||
|
const height = maxY - minY;
|
||||||
|
const centerX = (minX + maxX) / 2;
|
||||||
|
const centerY = (minY + maxY) / 2;
|
||||||
|
|
||||||
|
// Calculate rotation angle from the bottom edge (bottomLeft to bottomRight)
|
||||||
|
const rotation = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x) * (180 / Math.PI);
|
||||||
|
|
||||||
|
// Calculate skew angles to handle perspective distortion
|
||||||
|
// SkewX: compare left and right edges
|
||||||
|
const leftEdgeAngle = Math.atan2(bottomLeft.y - topLeft.y, bottomLeft.x - topLeft.x);
|
||||||
|
const rightEdgeAngle = Math.atan2(bottomRight.y - topRight.y, bottomRight.x - topRight.x);
|
||||||
|
const skewX = (rightEdgeAngle - leftEdgeAngle) * (180 / Math.PI);
|
||||||
|
|
||||||
|
// SkewY: compare top and bottom edges
|
||||||
|
const topEdgeAngle = Math.atan2(topRight.y - topLeft.y, topRight.x - topLeft.x);
|
||||||
|
const bottomEdgeAngle = Math.atan2(bottomRight.y - bottomLeft.y, bottomRight.x - bottomLeft.x);
|
||||||
|
const skewY = (bottomEdgeAngle - topEdgeAngle) * (180 / Math.PI);
|
||||||
|
|
||||||
|
return {
|
||||||
|
minX,
|
||||||
|
maxX,
|
||||||
|
minY,
|
||||||
|
maxY,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
centerX,
|
||||||
|
centerY,
|
||||||
|
rotation,
|
||||||
|
skewX,
|
||||||
|
skewY,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert normalized OCR coordinates to screen coordinates
|
||||||
|
* OCR coordinates are normalized (0-1) and represent the 4 corners of a rotated rectangle
|
||||||
|
*/
|
||||||
|
export const getOcrBoundingBoxes = (
|
||||||
|
ocrData: OcrBoundingBox[],
|
||||||
|
zoom: ZoomImageWheelState,
|
||||||
|
photoViewer: HTMLImageElement | null,
|
||||||
|
): OcrBox[] => {
|
||||||
|
const boxes: OcrBox[] = [];
|
||||||
|
|
||||||
|
if (photoViewer === null || !photoViewer.naturalWidth || !photoViewer.naturalHeight) {
|
||||||
|
return boxes;
|
||||||
|
}
|
||||||
|
|
||||||
|
const clientHeight = photoViewer.clientHeight;
|
||||||
|
const clientWidth = photoViewer.clientWidth;
|
||||||
|
const { width, height } = getContainedSize(photoViewer);
|
||||||
|
|
||||||
|
const imageWidth = photoViewer.naturalWidth;
|
||||||
|
const imageHeight = photoViewer.naturalHeight;
|
||||||
|
|
||||||
|
for (const ocr of ocrData) {
|
||||||
|
// Convert normalized coordinates (0-1) to actual pixel positions
|
||||||
|
// OCR provides 4 corners of a potentially rotated rectangle
|
||||||
|
const points = [
|
||||||
|
{ x: ocr.x1, y: ocr.y1 },
|
||||||
|
{ x: ocr.x2, y: ocr.y2 },
|
||||||
|
{ x: ocr.x3, y: ocr.y3 },
|
||||||
|
{ x: ocr.x4, y: ocr.y4 },
|
||||||
|
].map((point) => ({
|
||||||
|
x:
|
||||||
|
(width / imageWidth) * zoom.currentZoom * point.x * imageWidth +
|
||||||
|
((clientWidth - width) / 2) * zoom.currentZoom +
|
||||||
|
zoom.currentPositionX,
|
||||||
|
y:
|
||||||
|
(height / imageHeight) * zoom.currentZoom * point.y * imageHeight +
|
||||||
|
((clientHeight - height) / 2) * zoom.currentZoom +
|
||||||
|
zoom.currentPositionY,
|
||||||
|
}));
|
||||||
|
|
||||||
|
boxes.push({
|
||||||
|
id: ocr.id,
|
||||||
|
points,
|
||||||
|
text: ocr.text,
|
||||||
|
confidence: ocr.textScore,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return boxes;
|
||||||
|
};
|
||||||
Loading…
x
Reference in New Issue
Block a user