* feat: add OCR functionality and related configurations

* chore: update labeler configuration for machine learning files

* feat(i18n): enhance OCR model descriptions and add orientation classification and unwarping features

* chore: update Dockerfile to include ccache for improved build performance

* feat(ocr): enhance OCR model configuration with orientation classification and unwarping options, update PaddleOCR integration, and improve response structure

* refactor(ocr): remove OCR_CLEANUP job from enum and type definitions

* refactor(ocr): remove obsolete OCR entity and migration files, and update asset job status and schema to accommodate new OCR table structure

* refactor(ocr): update OCR schema and response structure to use individual coordinates instead of bounding box, and adjust related service and repository files

* feat: enhance OCR configuration and functionality

- Updated OCR settings to include minimum detection box score, minimum detection score, and minimum recognition score.
- Refactored PaddleOCRecognizer to utilize new scoring parameters.
- Introduced new database tables for asset OCR data and search functionality.
- Modified related services and repositories to support the new OCR features.
- Updated translations for improved clarity in settings UI.

* sql changes

* use rapidocr

* change dto

* update web

* update lock

* update api

* store positions as normalized floats

* match column order in db

* update admin ui settings descriptions

fix max resolution key

set min threshold to 0.1

fix bind

* apply config correctly, adjust defaults

* unnecessary model type

* unnecessary sources

* fix(ocr): switch RapidOCR lang type from LangDet to LangRec

* fix(ocr): expose lang_type (LangRec.CH) and font_path on OcrOptions for RapidOCR

* fix(ocr): make OCR text search case- and accent-insensitive using ILIKE + unaccent

* fix(ocr): add OCR search fields

* fix: Add OCR database migration and update ML prediction logic.

* trigrams are already case insensitive

* add tests

* format

* update migrations

* wrong uuid function

* linting

* maybe fix medium tests

* formatting

* fix weblate check

* openapi

* sql

* minor fixes

* maybe fix medium tests part 2

* passing medium tests

* format web

* readd sql

* format dart

* disabled in e2e

* chore: translation ordering

---------

Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>
Co-authored-by: Alex Tran <alex.tran1502@gmail.com>
This commit is contained in:
Kang 2025-10-27 22:09:55 +08:00 committed by GitHub
parent c666dc6c67
commit 02b29046b3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
90 changed files with 3610 additions and 1722 deletions

2
.github/labeler.yml vendored
View File

@ -31,7 +31,7 @@ documentation:
🧠machine-learning:
- changed-files:
- any-glob-to-any-file:
- machine-learning/app/**
- machine-learning/**
changelog:translation:
- head-branch: ['^chore/translations$']

View File

@ -122,7 +122,7 @@ services:
ports:
- 3003:3003
volumes:
- ../machine-learning:/usr/src/app
- ../machine-learning/immich_ml:/usr/src/immich_ml
- model-cache:/cache
env_file:
- .env

View File

@ -113,6 +113,7 @@ describe('/server', () => {
importFaces: false,
oauth: false,
oauthAutoLaunch: false,
ocr: false,
passwordLogin: true,
search: true,
sidecar: true,

View File

@ -154,6 +154,18 @@
"machine_learning_min_detection_score_description": "Minimum confidence score for a face to be detected from 0-1. Lower values will detect more faces but may result in false positives.",
"machine_learning_min_recognized_faces": "Minimum recognized faces",
"machine_learning_min_recognized_faces_description": "The minimum number of recognized faces for a person to be created. Increasing this makes Facial Recognition more precise at the cost of increasing the chance that a face is not assigned to a person.",
"machine_learning_ocr": "OCR",
"machine_learning_ocr_description": "Use machine learning to recognize text in images",
"machine_learning_ocr_enabled": "Enable OCR",
"machine_learning_ocr_enabled_description": "If disabled, images will not undergo text recognition.",
"machine_learning_ocr_max_resolution": "Maximum resolution",
"machine_learning_ocr_max_resolution_description": "Previews above this resolution will be resized while preserving aspect ratio. Higher values are more accurate, but take longer to process and use more memory.",
"machine_learning_ocr_min_detection_score": "Minimum detection score",
"machine_learning_ocr_min_detection_score_description": "Minimum confidence score for text to be detected from 0-1. Lower values will detect more text but may result in false positives.",
"machine_learning_ocr_min_recognition_score": "Minimum recognition score",
"machine_learning_ocr_min_score_recognition_description": "Minimum confidence score for detected text to be recognized from 0-1. Lower values will recognize more text but may result in false positives.",
"machine_learning_ocr_model": "OCR model",
"machine_learning_ocr_model_description": "Server models are more accurate than mobile models, but take longer to process and use more memory.",
"machine_learning_settings": "Machine Learning Settings",
"machine_learning_settings_description": "Manage machine learning features and settings",
"machine_learning_smart_search": "Smart Search",
@ -245,6 +257,7 @@
"oauth_storage_quota_default_description": "Quota in GiB to be used when no claim is provided.",
"oauth_timeout": "Request Timeout",
"oauth_timeout_description": "Timeout for requests in milliseconds",
"ocr_job_description": "Use machine learning to recognize text in images",
"password_enable_description": "Login with email and password",
"password_settings": "Password Login",
"password_settings_description": "Manage password login settings",
@ -1438,6 +1451,7 @@
"oauth": "OAuth",
"obtainium_configurator": "Obtainium Configurator",
"obtainium_configurator_instructions": "Use Obtainium to install and update the Android app directly from Immich GitHub's release. Create an API key and select a variant to create your Obtainium configuration link",
"ocr": "OCR",
"official_immich_resources": "Official Immich Resources",
"offline": "Offline",
"offset": "Offset",
@ -1715,6 +1729,8 @@
"search_by_description_example": "Hiking day in Sapa",
"search_by_filename": "Search by file name or extension",
"search_by_filename_example": "i.e. IMG_1234.JPG or PNG",
"search_by_ocr": "Search by OCR",
"search_by_ocr_example": "Latte",
"search_camera_lens_model": "Search lens model...",
"search_camera_make": "Search camera make...",
"search_camera_model": "Search camera model...",
@ -1732,6 +1748,7 @@
"search_filter_location_title": "Select location",
"search_filter_media_type": "Media Type",
"search_filter_media_type_title": "Select media type",
"search_filter_ocr": "Search by OCR",
"search_filter_people_title": "Select people",
"search_for": "Search for",
"search_for_existing_person": "Search for existing person",

View File

@ -141,7 +141,7 @@ FROM prod-${DEVICE} AS prod
ARG DEVICE
RUN apt-get update && \
apt-get install -y --no-install-recommends tini $(if ! [ "$DEVICE" = "openvino" ] && ! [ "$DEVICE" = "rocm" ]; then echo "libmimalloc2.0"; fi) && \
apt-get install -y --no-install-recommends tini ccache libgl1 libglib2.0-0 libgomp1 $(if ! [ "$DEVICE" = "openvino" ] && ! [ "$DEVICE" = "rocm" ]; then echo "libmimalloc2.0"; fi) && \
apt-get autoremove -yqq && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

View File

@ -41,6 +41,7 @@ class PreloadModelData(BaseModel):
class MaxBatchSize(BaseModel):
facial_recognition: int | None = None
text_recognition: int | None = None
class Settings(BaseSettings):

View File

@ -183,7 +183,9 @@ async def run_inference(payload: Image | str, entries: InferenceEntries) -> Infe
response: InferenceResponse = {}
async def _run_inference(entry: InferenceEntry) -> None:
model = await model_cache.get(entry["name"], entry["type"], entry["task"], ttl=settings.model_ttl)
model = await model_cache.get(
entry["name"], entry["type"], entry["task"], ttl=settings.model_ttl, **entry["options"]
)
inputs = [payload]
for dep in model.depends:
try:

View File

@ -3,6 +3,8 @@ from typing import Any
from immich_ml.models.base import InferenceModel
from immich_ml.models.clip.textual import MClipTextualEncoder, OpenClipTextualEncoder
from immich_ml.models.clip.visual import OpenClipVisualEncoder
from immich_ml.models.ocr.detection import TextDetector
from immich_ml.models.ocr.recognition import TextRecognizer
from immich_ml.schemas import ModelSource, ModelTask, ModelType
from .constants import get_model_source
@ -28,6 +30,12 @@ def get_model_class(model_name: str, model_type: ModelType, model_task: ModelTas
case ModelSource.INSIGHTFACE, ModelType.RECOGNITION, ModelTask.FACIAL_RECOGNITION:
return FaceRecognizer
case ModelSource.PADDLE, ModelType.DETECTION, ModelTask.OCR:
return TextDetector
case ModelSource.PADDLE, ModelType.RECOGNITION, ModelTask.OCR:
return TextRecognizer
case _:
raise ValueError(f"Unknown model combination: {source}, {model_type}, {model_task}")

View File

@ -38,9 +38,8 @@ class InferenceModel(ABC):
def download(self) -> None:
if not self.cached:
log.info(
f"Downloading {self.model_type.replace('-', ' ')} model '{self.model_name}'. This may take a while."
)
model_type = self.model_type.replace("-", " ")
log.info(f"Downloading {model_type} model '{self.model_name}' to {self.model_path}. This may take a while.")
self._download()
def load(self) -> None:
@ -58,7 +57,7 @@ class InferenceModel(ABC):
self.load()
if model_kwargs:
self.configure(**model_kwargs)
return self._predict(*inputs, **model_kwargs)
return self._predict(*inputs)
@abstractmethod
def _predict(self, *inputs: Any, **model_kwargs: Any) -> Any: ...

View File

@ -19,7 +19,7 @@ class BaseCLIPTextualEncoder(InferenceModel):
depends = []
identity = (ModelType.TEXTUAL, ModelTask.SEARCH)
def _predict(self, inputs: str, language: str | None = None, **kwargs: Any) -> str:
def _predict(self, inputs: str, language: str | None = None) -> str:
tokens = self.tokenize(inputs, language=language)
res: NDArray[np.float32] = self.session.run(None, tokens)[0][0]
return serialize_np_array(res)

View File

@ -26,7 +26,7 @@ class BaseCLIPVisualEncoder(InferenceModel):
depends = []
identity = (ModelType.VISUAL, ModelTask.SEARCH)
def _predict(self, inputs: Image.Image | bytes, **kwargs: Any) -> str:
def _predict(self, inputs: Image.Image | bytes) -> str:
image = decode_pil(inputs)
res: NDArray[np.float32] = self.session.run(None, self.transform(image))[0][0]
return serialize_np_array(res)

View File

@ -75,6 +75,11 @@ _INSIGHTFACE_MODELS = {
}
_PADDLE_MODELS = {
"PP-OCRv5_server",
"PP-OCRv5_mobile",
}
SUPPORTED_PROVIDERS = [
"CUDAExecutionProvider",
"ROCMExecutionProvider",
@ -159,4 +164,7 @@ def get_model_source(model_name: str) -> ModelSource | None:
if cleaned_name in _OPENCLIP_MODELS:
return ModelSource.OPENCLIP
if cleaned_name in _PADDLE_MODELS:
return ModelSource.PADDLE
return None

View File

@ -24,7 +24,7 @@ class FaceDetector(InferenceModel):
return session
def _predict(self, inputs: NDArray[np.uint8] | bytes, **kwargs: Any) -> FaceDetectionOutput:
def _predict(self, inputs: NDArray[np.uint8] | bytes) -> FaceDetectionOutput:
inputs = decode_cv2(inputs)
bboxes, landmarks = self._detect(inputs)

View File

@ -44,7 +44,7 @@ class FaceRecognizer(InferenceModel):
return session
def _predict(
self, inputs: NDArray[np.uint8] | bytes | Image.Image, faces: FaceDetectionOutput, **kwargs: Any
self, inputs: NDArray[np.uint8] | bytes | Image.Image, faces: FaceDetectionOutput
) -> FacialRecognitionOutput:
if faces["boxes"].shape[0] == 0:
return []

View File

@ -0,0 +1,86 @@
from typing import Any
import numpy as np
from PIL import Image
from rapidocr.ch_ppocr_det import TextDetector as RapidTextDetector
from rapidocr.inference_engine.base import FileInfo, InferSession
from rapidocr.utils import DownloadFile, DownloadFileInput
from rapidocr.utils.typings import EngineType, LangDet, OCRVersion, TaskType
from rapidocr.utils.typings import ModelType as RapidModelType
from immich_ml.config import log
from immich_ml.models.base import InferenceModel
from immich_ml.models.transforms import decode_cv2
from immich_ml.schemas import ModelFormat, ModelSession, ModelTask, ModelType
from immich_ml.sessions.ort import OrtSession
from .schemas import OcrOptions, TextDetectionOutput
class TextDetector(InferenceModel):
depends = []
identity = (ModelType.DETECTION, ModelTask.OCR)
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
super().__init__(model_name, **model_kwargs, model_format=ModelFormat.ONNX)
self.max_resolution = 736
self.min_score = 0.5
self.score_mode = "fast"
self._empty: TextDetectionOutput = {
"image": np.empty(0, dtype=np.float32),
"boxes": np.empty(0, dtype=np.float32),
"scores": np.empty(0, dtype=np.float32),
}
def _download(self) -> None:
model_info = InferSession.get_model_url(
FileInfo(
engine_type=EngineType.ONNXRUNTIME,
ocr_version=OCRVersion.PPOCRV5,
task_type=TaskType.DET,
lang_type=LangDet.CH,
model_type=RapidModelType.MOBILE if "mobile" in self.model_name else RapidModelType.SERVER,
)
)
download_params = DownloadFileInput(
file_url=model_info["model_dir"],
sha256=model_info["SHA256"],
save_path=self.model_path,
logger=log,
)
DownloadFile.run(download_params)
def _load(self) -> ModelSession:
# TODO: support other runtime sessions
session = OrtSession(self.model_path)
self.model = RapidTextDetector(
OcrOptions(
session=session.session,
limit_side_len=self.max_resolution,
limit_type="min",
box_thresh=self.min_score,
score_mode=self.score_mode,
)
)
return session
def _predict(self, inputs: bytes | Image.Image) -> TextDetectionOutput:
results = self.model(decode_cv2(inputs))
if results.boxes is None or results.scores is None or results.img is None:
return self._empty
return {
"image": results.img,
"boxes": np.array(results.boxes, dtype=np.float32),
"scores": np.array(results.scores, dtype=np.float32),
}
def configure(self, **kwargs: Any) -> None:
if (max_resolution := kwargs.get("maxResolution")) is not None:
self.max_resolution = max_resolution
self.model.limit_side_len = max_resolution
if (min_score := kwargs.get("minScore")) is not None:
self.min_score = min_score
self.model.postprocess_op.box_thresh = min_score
if (score_mode := kwargs.get("scoreMode")) is not None:
self.score_mode = score_mode
self.model.postprocess_op.score_mode = score_mode

View File

@ -0,0 +1,117 @@
from typing import Any
import cv2
import numpy as np
from numpy.typing import NDArray
from PIL.Image import Image
from rapidocr.ch_ppocr_rec import TextRecInput
from rapidocr.ch_ppocr_rec import TextRecognizer as RapidTextRecognizer
from rapidocr.inference_engine.base import FileInfo, InferSession
from rapidocr.utils import DownloadFile, DownloadFileInput
from rapidocr.utils.typings import EngineType, LangRec, OCRVersion, TaskType
from rapidocr.utils.typings import ModelType as RapidModelType
from immich_ml.config import log, settings
from immich_ml.models.base import InferenceModel
from immich_ml.schemas import ModelFormat, ModelSession, ModelTask, ModelType
from immich_ml.sessions.ort import OrtSession
from .schemas import OcrOptions, TextDetectionOutput, TextRecognitionOutput
class TextRecognizer(InferenceModel):
depends = [(ModelType.DETECTION, ModelTask.OCR)]
identity = (ModelType.RECOGNITION, ModelTask.OCR)
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
self.min_score = model_kwargs.get("minScore", 0.9)
self._empty: TextRecognitionOutput = {
"box": np.empty(0, dtype=np.float32),
"boxScore": np.empty(0, dtype=np.float32),
"text": [],
"textScore": np.empty(0, dtype=np.float32),
}
super().__init__(model_name, **model_kwargs, model_format=ModelFormat.ONNX)
def _download(self) -> None:
model_info = InferSession.get_model_url(
FileInfo(
engine_type=EngineType.ONNXRUNTIME,
ocr_version=OCRVersion.PPOCRV5,
task_type=TaskType.REC,
lang_type=LangRec.CH,
model_type=RapidModelType.MOBILE if "mobile" in self.model_name else RapidModelType.SERVER,
)
)
download_params = DownloadFileInput(
file_url=model_info["model_dir"],
sha256=model_info["SHA256"],
save_path=self.model_path,
logger=log,
)
DownloadFile.run(download_params)
def _load(self) -> ModelSession:
# TODO: support other runtimes
session = OrtSession(self.model_path)
self.model = RapidTextRecognizer(
OcrOptions(
session=session.session,
rec_batch_num=settings.max_batch_size.text_recognition if settings.max_batch_size is not None else 6,
rec_img_shape=(3, 48, 320),
)
)
return session
def _predict(self, _: Image, texts: TextDetectionOutput) -> TextRecognitionOutput:
boxes, img, box_scores = texts["boxes"], texts["image"], texts["scores"]
if boxes.shape[0] == 0:
return self._empty
rec = self.model(TextRecInput(img=self.get_crop_img_list(img, boxes)))
if rec.txts is None:
return self._empty
height, width = img.shape[0:2]
boxes[:, :, 0] /= width
boxes[:, :, 1] /= height
text_scores = np.array(rec.scores)
valid_text_score_idx = text_scores > self.min_score
valid_score_idx_list = valid_text_score_idx.tolist()
return {
"box": boxes.reshape(-1, 8)[valid_text_score_idx].reshape(-1),
"text": [rec.txts[i] for i in range(len(rec.txts)) if valid_score_idx_list[i]],
"boxScore": box_scores[valid_text_score_idx],
"textScore": text_scores[valid_text_score_idx],
}
def get_crop_img_list(self, img: NDArray[np.float32], boxes: NDArray[np.float32]) -> list[NDArray[np.float32]]:
img_crop_width = np.maximum(
np.linalg.norm(boxes[:, 1] - boxes[:, 0], axis=1), np.linalg.norm(boxes[:, 2] - boxes[:, 3], axis=1)
).astype(np.int32)
img_crop_height = np.maximum(
np.linalg.norm(boxes[:, 0] - boxes[:, 3], axis=1), np.linalg.norm(boxes[:, 1] - boxes[:, 2], axis=1)
).astype(np.int32)
pts_std = np.zeros((img_crop_width.shape[0], 4, 2), dtype=np.float32)
pts_std[:, 1:3, 0] = img_crop_width[:, None]
pts_std[:, 2:4, 1] = img_crop_height[:, None]
img_crop_sizes = np.stack([img_crop_width, img_crop_height], axis=1).tolist()
imgs: list[NDArray[np.float32]] = []
for box, pts_std, dst_size in zip(list(boxes), list(pts_std), img_crop_sizes):
M = cv2.getPerspectiveTransform(box, pts_std)
dst_img: NDArray[np.float32] = cv2.warpPerspective(
img,
M,
dst_size,
borderMode=cv2.BORDER_REPLICATE,
flags=cv2.INTER_CUBIC,
) # type: ignore
dst_height, dst_width = dst_img.shape[0:2]
if dst_height * 1.0 / dst_width >= 1.5:
dst_img = np.rot90(dst_img)
imgs.append(dst_img)
return imgs
def configure(self, **kwargs: Any) -> None:
self.min_score = kwargs.get("minScore", self.min_score)

View File

@ -0,0 +1,28 @@
from typing import Any, Iterable
import numpy as np
import numpy.typing as npt
from rapidocr.utils.typings import EngineType, LangRec
from typing_extensions import TypedDict
class TextDetectionOutput(TypedDict):
image: npt.NDArray[np.float32]
boxes: npt.NDArray[np.float32]
scores: npt.NDArray[np.float32]
class TextRecognitionOutput(TypedDict):
box: npt.NDArray[np.float32]
boxScore: npt.NDArray[np.float32]
text: Iterable[str]
textScore: npt.NDArray[np.float32]
# RapidOCR expects `engine_type`, `lang_type`, and `font_path` to be attributes
class OcrOptions(dict[str, Any]):
def __init__(self, **options: Any) -> None:
super().__init__(**options)
self.engine_type = EngineType.ONNXRUNTIME
self.lang_type = LangRec.CH
self.font_path = None

View File

@ -23,6 +23,7 @@ class BoundingBox(TypedDict):
class ModelTask(StrEnum):
FACIAL_RECOGNITION = "facial-recognition"
SEARCH = "clip"
OCR = "ocr"
class ModelType(StrEnum):
@ -42,6 +43,7 @@ class ModelSource(StrEnum):
INSIGHTFACE = "insightface"
MCLIP = "mclip"
OPENCLIP = "openclip"
PADDLE = "paddle"
ModelIdentity = tuple[ModelType, ModelTask]

View File

@ -14,6 +14,8 @@ from ..config import log, settings
class OrtSession:
session: ort.InferenceSession
def __init__(
self,
model_path: Path | str,

View File

@ -22,6 +22,8 @@ dependencies = [
"rich>=13.4.2",
"tokenizers>=0.15.0,<1.0",
"uvicorn[standard]>=0.22.0,<1.0",
"setuptools>=78.1.0",
"rapidocr>=3.1.0",
]
[dependency-groups]

3470
machine-learning/uv.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
enum SortOrder { asc, desc }
enum TextSearchType { context, filename, description }
enum TextSearchType { context, filename, description, ocr }
enum AssetVisibilityEnum { timeline, hidden, archive, locked }

View File

@ -43,6 +43,7 @@ class SearchApiRepository extends ApiRepository {
originalFileName: filter.filename != null && filter.filename!.isNotEmpty ? filter.filename : null,
country: filter.location.country,
description: filter.description != null && filter.description!.isNotEmpty ? filter.description : null,
ocr: filter.ocr != null && filter.ocr!.isNotEmpty ? filter.ocr : null,
state: filter.location.state,
city: filter.location.city,
make: filter.camera.make,

View File

@ -176,6 +176,7 @@ class SearchFilter {
String? context;
String? filename;
String? description;
String? ocr;
String? language;
Set<PersonDto> people;
SearchLocationFilter location;
@ -190,6 +191,7 @@ class SearchFilter {
this.context,
this.filename,
this.description,
this.ocr,
this.language,
required this.people,
required this.location,
@ -203,6 +205,7 @@ class SearchFilter {
return (context == null || (context != null && context!.isEmpty)) &&
(filename == null || (filename!.isEmpty)) &&
(description == null || (description!.isEmpty)) &&
(ocr == null || (ocr!.isEmpty)) &&
people.isEmpty &&
location.country == null &&
location.state == null &&
@ -222,6 +225,7 @@ class SearchFilter {
String? filename,
String? description,
String? language,
String? ocr,
Set<PersonDto>? people,
SearchLocationFilter? location,
SearchCameraFilter? camera,
@ -234,6 +238,7 @@ class SearchFilter {
filename: filename ?? this.filename,
description: description ?? this.description,
language: language ?? this.language,
ocr: ocr ?? this.ocr,
people: people ?? this.people,
location: location ?? this.location,
camera: camera ?? this.camera,
@ -245,7 +250,7 @@ class SearchFilter {
@override
String toString() {
return 'SearchFilter(context: $context, filename: $filename, description: $description, language: $language, people: $people, location: $location, camera: $camera, date: $date, display: $display, mediaType: $mediaType)';
return 'SearchFilter(context: $context, filename: $filename, description: $description, language: $language, ocr: $ocr, people: $people, location: $location, camera: $camera, date: $date, display: $display, mediaType: $mediaType)';
}
@override
@ -256,6 +261,7 @@ class SearchFilter {
other.filename == filename &&
other.description == description &&
other.language == language &&
other.ocr == ocr &&
other.people == people &&
other.location == location &&
other.camera == camera &&
@ -270,6 +276,7 @@ class SearchFilter {
filename.hashCode ^
description.hashCode ^
language.hashCode ^
ocr.hashCode ^
people.hashCode ^
location.hashCode ^
camera.hashCode ^

View File

@ -389,15 +389,18 @@ class SearchPage extends HookConsumerWidget {
handleTextSubmitted(String value) {
switch (textSearchType.value) {
case TextSearchType.context:
filter.value = filter.value.copyWith(filename: '', context: value, description: '');
filter.value = filter.value.copyWith(filename: '', context: value, description: '', ocr: '');
break;
case TextSearchType.filename:
filter.value = filter.value.copyWith(filename: value, context: '', description: '');
filter.value = filter.value.copyWith(filename: value, context: '', description: '', ocr: '');
break;
case TextSearchType.description:
filter.value = filter.value.copyWith(filename: '', context: '', description: value);
filter.value = filter.value.copyWith(filename: '', context: '', description: value, ocr: '');
break;
case TextSearchType.ocr:
filter.value = filter.value.copyWith(filename: '', context: '', description: '', ocr: value);
break;
}
@ -408,6 +411,7 @@ class SearchPage extends HookConsumerWidget {
TextSearchType.context => Icons.image_search_rounded,
TextSearchType.filename => Icons.abc_rounded,
TextSearchType.description => Icons.text_snippet_outlined,
TextSearchType.ocr => Icons.document_scanner_outlined,
};
return Scaffold(
@ -493,6 +497,24 @@ class SearchPage extends HookConsumerWidget {
searchHintText.value = 'search_by_description_example'.tr();
},
),
MenuItemButton(
child: ListTile(
leading: const Icon(Icons.document_scanner_outlined),
title: Text(
'search_filter_ocr'.tr(),
style: context.textTheme.bodyLarge?.copyWith(
fontWeight: FontWeight.w500,
color: textSearchType.value == TextSearchType.ocr ? context.colorScheme.primary : null,
),
),
selectedColor: context.colorScheme.primary,
selected: textSearchType.value == TextSearchType.ocr,
),
onPressed: () {
textSearchType.value = TextSearchType.ocr;
searchHintText.value = 'search_by_ocr_example'.tr();
},
),
],
),
),

View File

@ -395,15 +395,18 @@ class DriftSearchPage extends HookConsumerWidget {
handleTextSubmitted(String value) {
switch (textSearchType.value) {
case TextSearchType.context:
filter.value = filter.value.copyWith(filename: '', context: value, description: '');
filter.value = filter.value.copyWith(filename: '', context: value, description: '', ocr: '');
break;
case TextSearchType.filename:
filter.value = filter.value.copyWith(filename: value, context: '', description: '');
filter.value = filter.value.copyWith(filename: value, context: '', description: '', ocr: '');
break;
case TextSearchType.description:
filter.value = filter.value.copyWith(filename: '', context: '', description: value);
filter.value = filter.value.copyWith(filename: '', context: '', description: value, ocr: '');
break;
case TextSearchType.ocr:
filter.value = filter.value.copyWith(filename: '', context: '', description: '', ocr: value);
break;
}
@ -414,6 +417,7 @@ class DriftSearchPage extends HookConsumerWidget {
TextSearchType.context => Icons.image_search_rounded,
TextSearchType.filename => Icons.abc_rounded,
TextSearchType.description => Icons.text_snippet_outlined,
TextSearchType.ocr => Icons.document_scanner_outlined,
};
return Scaffold(
@ -499,6 +503,24 @@ class DriftSearchPage extends HookConsumerWidget {
searchHintText.value = 'search_by_description_example'.t(context: context);
},
),
MenuItemButton(
child: ListTile(
leading: const Icon(Icons.document_scanner_outlined),
title: Text(
'search_by_ocr'.t(context: context),
style: context.textTheme.bodyLarge?.copyWith(
fontWeight: FontWeight.w500,
color: textSearchType.value == TextSearchType.ocr ? context.colorScheme.primary : null,
),
),
selectedColor: context.colorScheme.primary,
selected: textSearchType.value == TextSearchType.ocr,
),
onPressed: () {
textSearchType.value = TextSearchType.ocr;
searchHintText.value = 'search_by_ocr_example'.t(context: context);
},
),
],
),
),

View File

@ -419,6 +419,7 @@ Class | Method | HTTP request | Description
- [OAuthCallbackDto](doc//OAuthCallbackDto.md)
- [OAuthConfigDto](doc//OAuthConfigDto.md)
- [OAuthTokenEndpointAuthMethod](doc//OAuthTokenEndpointAuthMethod.md)
- [OcrConfig](doc//OcrConfig.md)
- [OnThisDayDto](doc//OnThisDayDto.md)
- [OnboardingDto](doc//OnboardingDto.md)
- [OnboardingResponseDto](doc//OnboardingResponseDto.md)

View File

@ -189,6 +189,7 @@ part 'model/o_auth_authorize_response_dto.dart';
part 'model/o_auth_callback_dto.dart';
part 'model/o_auth_config_dto.dart';
part 'model/o_auth_token_endpoint_auth_method.dart';
part 'model/ocr_config.dart';
part 'model/on_this_day_dto.dart';
part 'model/onboarding_dto.dart';
part 'model/onboarding_response_dto.dart';

View File

@ -353,6 +353,8 @@ class SearchApi {
///
/// * [String] model:
///
/// * [String] ocr:
///
/// * [List<String>] personIds:
///
/// * [num] rating:
@ -382,7 +384,7 @@ class SearchApi {
/// * [bool] withDeleted:
///
/// * [bool] withExif:
Future<Response> searchLargeAssetsWithHttpInfo({ List<String>? albumIds, String? city, String? country, DateTime? createdAfter, DateTime? createdBefore, String? deviceId, bool? isEncoded, bool? isFavorite, bool? isMotion, bool? isNotInAlbum, bool? isOffline, String? lensModel, String? libraryId, String? make, int? minFileSize, String? model, List<String>? personIds, num? rating, num? size, String? state, List<String>? tagIds, DateTime? takenAfter, DateTime? takenBefore, DateTime? trashedAfter, DateTime? trashedBefore, AssetTypeEnum? type, DateTime? updatedAfter, DateTime? updatedBefore, AssetVisibility? visibility, bool? withDeleted, bool? withExif, }) async {
Future<Response> searchLargeAssetsWithHttpInfo({ List<String>? albumIds, String? city, String? country, DateTime? createdAfter, DateTime? createdBefore, String? deviceId, bool? isEncoded, bool? isFavorite, bool? isMotion, bool? isNotInAlbum, bool? isOffline, String? lensModel, String? libraryId, String? make, int? minFileSize, String? model, String? ocr, List<String>? personIds, num? rating, num? size, String? state, List<String>? tagIds, DateTime? takenAfter, DateTime? takenBefore, DateTime? trashedAfter, DateTime? trashedBefore, AssetTypeEnum? type, DateTime? updatedAfter, DateTime? updatedBefore, AssetVisibility? visibility, bool? withDeleted, bool? withExif, }) async {
// ignore: prefer_const_declarations
final apiPath = r'/search/large-assets';
@ -441,6 +443,9 @@ class SearchApi {
if (model != null) {
queryParams.addAll(_queryParams('', 'model', model));
}
if (ocr != null) {
queryParams.addAll(_queryParams('', 'ocr', ocr));
}
if (personIds != null) {
queryParams.addAll(_queryParams('multi', 'personIds', personIds));
}
@ -537,6 +542,8 @@ class SearchApi {
///
/// * [String] model:
///
/// * [String] ocr:
///
/// * [List<String>] personIds:
///
/// * [num] rating:
@ -566,8 +573,8 @@ class SearchApi {
/// * [bool] withDeleted:
///
/// * [bool] withExif:
Future<List<AssetResponseDto>?> searchLargeAssets({ List<String>? albumIds, String? city, String? country, DateTime? createdAfter, DateTime? createdBefore, String? deviceId, bool? isEncoded, bool? isFavorite, bool? isMotion, bool? isNotInAlbum, bool? isOffline, String? lensModel, String? libraryId, String? make, int? minFileSize, String? model, List<String>? personIds, num? rating, num? size, String? state, List<String>? tagIds, DateTime? takenAfter, DateTime? takenBefore, DateTime? trashedAfter, DateTime? trashedBefore, AssetTypeEnum? type, DateTime? updatedAfter, DateTime? updatedBefore, AssetVisibility? visibility, bool? withDeleted, bool? withExif, }) async {
final response = await searchLargeAssetsWithHttpInfo( albumIds: albumIds, city: city, country: country, createdAfter: createdAfter, createdBefore: createdBefore, deviceId: deviceId, isEncoded: isEncoded, isFavorite: isFavorite, isMotion: isMotion, isNotInAlbum: isNotInAlbum, isOffline: isOffline, lensModel: lensModel, libraryId: libraryId, make: make, minFileSize: minFileSize, model: model, personIds: personIds, rating: rating, size: size, state: state, tagIds: tagIds, takenAfter: takenAfter, takenBefore: takenBefore, trashedAfter: trashedAfter, trashedBefore: trashedBefore, type: type, updatedAfter: updatedAfter, updatedBefore: updatedBefore, visibility: visibility, withDeleted: withDeleted, withExif: withExif, );
Future<List<AssetResponseDto>?> searchLargeAssets({ List<String>? albumIds, String? city, String? country, DateTime? createdAfter, DateTime? createdBefore, String? deviceId, bool? isEncoded, bool? isFavorite, bool? isMotion, bool? isNotInAlbum, bool? isOffline, String? lensModel, String? libraryId, String? make, int? minFileSize, String? model, String? ocr, List<String>? personIds, num? rating, num? size, String? state, List<String>? tagIds, DateTime? takenAfter, DateTime? takenBefore, DateTime? trashedAfter, DateTime? trashedBefore, AssetTypeEnum? type, DateTime? updatedAfter, DateTime? updatedBefore, AssetVisibility? visibility, bool? withDeleted, bool? withExif, }) async {
final response = await searchLargeAssetsWithHttpInfo( albumIds: albumIds, city: city, country: country, createdAfter: createdAfter, createdBefore: createdBefore, deviceId: deviceId, isEncoded: isEncoded, isFavorite: isFavorite, isMotion: isMotion, isNotInAlbum: isNotInAlbum, isOffline: isOffline, lensModel: lensModel, libraryId: libraryId, make: make, minFileSize: minFileSize, model: model, ocr: ocr, personIds: personIds, rating: rating, size: size, state: state, tagIds: tagIds, takenAfter: takenAfter, takenBefore: takenBefore, trashedAfter: trashedAfter, trashedBefore: trashedBefore, type: type, updatedAfter: updatedAfter, updatedBefore: updatedBefore, visibility: visibility, withDeleted: withDeleted, withExif: withExif, );
if (response.statusCode >= HttpStatus.badRequest) {
throw ApiException(response.statusCode, await _decodeBodyBytes(response));
}

View File

@ -432,6 +432,8 @@ class ApiClient {
return OAuthConfigDto.fromJson(value);
case 'OAuthTokenEndpointAuthMethod':
return OAuthTokenEndpointAuthMethodTypeTransformer().decode(value);
case 'OcrConfig':
return OcrConfig.fromJson(value);
case 'OnThisDayDto':
return OnThisDayDto.fromJson(value);
case 'OnboardingDto':

View File

@ -22,6 +22,7 @@ class AllJobStatusResponseDto {
required this.metadataExtraction,
required this.migration,
required this.notifications,
required this.ocr,
required this.search,
required this.sidecar,
required this.smartSearch,
@ -48,6 +49,8 @@ class AllJobStatusResponseDto {
JobStatusDto notifications;
JobStatusDto ocr;
JobStatusDto search;
JobStatusDto sidecar;
@ -71,6 +74,7 @@ class AllJobStatusResponseDto {
other.metadataExtraction == metadataExtraction &&
other.migration == migration &&
other.notifications == notifications &&
other.ocr == ocr &&
other.search == search &&
other.sidecar == sidecar &&
other.smartSearch == smartSearch &&
@ -90,6 +94,7 @@ class AllJobStatusResponseDto {
(metadataExtraction.hashCode) +
(migration.hashCode) +
(notifications.hashCode) +
(ocr.hashCode) +
(search.hashCode) +
(sidecar.hashCode) +
(smartSearch.hashCode) +
@ -98,7 +103,7 @@ class AllJobStatusResponseDto {
(videoConversion.hashCode);
@override
String toString() => 'AllJobStatusResponseDto[backgroundTask=$backgroundTask, backupDatabase=$backupDatabase, duplicateDetection=$duplicateDetection, faceDetection=$faceDetection, facialRecognition=$facialRecognition, library_=$library_, metadataExtraction=$metadataExtraction, migration=$migration, notifications=$notifications, search=$search, sidecar=$sidecar, smartSearch=$smartSearch, storageTemplateMigration=$storageTemplateMigration, thumbnailGeneration=$thumbnailGeneration, videoConversion=$videoConversion]';
String toString() => 'AllJobStatusResponseDto[backgroundTask=$backgroundTask, backupDatabase=$backupDatabase, duplicateDetection=$duplicateDetection, faceDetection=$faceDetection, facialRecognition=$facialRecognition, library_=$library_, metadataExtraction=$metadataExtraction, migration=$migration, notifications=$notifications, ocr=$ocr, search=$search, sidecar=$sidecar, smartSearch=$smartSearch, storageTemplateMigration=$storageTemplateMigration, thumbnailGeneration=$thumbnailGeneration, videoConversion=$videoConversion]';
Map<String, dynamic> toJson() {
final json = <String, dynamic>{};
@ -111,6 +116,7 @@ class AllJobStatusResponseDto {
json[r'metadataExtraction'] = this.metadataExtraction;
json[r'migration'] = this.migration;
json[r'notifications'] = this.notifications;
json[r'ocr'] = this.ocr;
json[r'search'] = this.search;
json[r'sidecar'] = this.sidecar;
json[r'smartSearch'] = this.smartSearch;
@ -138,6 +144,7 @@ class AllJobStatusResponseDto {
metadataExtraction: JobStatusDto.fromJson(json[r'metadataExtraction'])!,
migration: JobStatusDto.fromJson(json[r'migration'])!,
notifications: JobStatusDto.fromJson(json[r'notifications'])!,
ocr: JobStatusDto.fromJson(json[r'ocr'])!,
search: JobStatusDto.fromJson(json[r'search'])!,
sidecar: JobStatusDto.fromJson(json[r'sidecar'])!,
smartSearch: JobStatusDto.fromJson(json[r'smartSearch'])!,
@ -200,6 +207,7 @@ class AllJobStatusResponseDto {
'metadataExtraction',
'migration',
'notifications',
'ocr',
'search',
'sidecar',
'smartSearch',

View File

@ -38,6 +38,7 @@ class JobName {
static const library_ = JobName._(r'library');
static const notifications = JobName._(r'notifications');
static const backupDatabase = JobName._(r'backupDatabase');
static const ocr = JobName._(r'ocr');
/// List of all possible values in this [enum][JobName].
static const values = <JobName>[
@ -56,6 +57,7 @@ class JobName {
library_,
notifications,
backupDatabase,
ocr,
];
static JobName? fromJson(dynamic value) => JobNameTypeTransformer().decode(value);
@ -109,6 +111,7 @@ class JobNameTypeTransformer {
case r'library': return JobName.library_;
case r'notifications': return JobName.notifications;
case r'backupDatabase': return JobName.backupDatabase;
case r'ocr': return JobName.ocr;
default:
if (!allowNull) {
throw ArgumentError('Unknown enum value to decode: $data');

View File

@ -33,6 +33,7 @@ class MetadataSearchDto {
this.libraryId,
this.make,
this.model,
this.ocr,
this.order = AssetOrder.desc,
this.originalFileName,
this.originalPath,
@ -182,6 +183,14 @@ class MetadataSearchDto {
String? model;
///
/// Please note: This property should have been non-nullable! Since the specification file
/// does not include a default value (using the "default:" property), however, the generated
/// source code must fall back to having a nullable type.
/// Consider adding a "default:" property in the specification file to hide this note.
///
String? ocr;
AssetOrder order;
///
@ -369,6 +378,7 @@ class MetadataSearchDto {
other.libraryId == libraryId &&
other.make == make &&
other.model == model &&
other.ocr == ocr &&
other.order == order &&
other.originalFileName == originalFileName &&
other.originalPath == originalPath &&
@ -416,6 +426,7 @@ class MetadataSearchDto {
(libraryId == null ? 0 : libraryId!.hashCode) +
(make == null ? 0 : make!.hashCode) +
(model == null ? 0 : model!.hashCode) +
(ocr == null ? 0 : ocr!.hashCode) +
(order.hashCode) +
(originalFileName == null ? 0 : originalFileName!.hashCode) +
(originalPath == null ? 0 : originalPath!.hashCode) +
@ -441,7 +452,7 @@ class MetadataSearchDto {
(withStacked == null ? 0 : withStacked!.hashCode);
@override
String toString() => 'MetadataSearchDto[albumIds=$albumIds, checksum=$checksum, city=$city, country=$country, createdAfter=$createdAfter, createdBefore=$createdBefore, description=$description, deviceAssetId=$deviceAssetId, deviceId=$deviceId, encodedVideoPath=$encodedVideoPath, id=$id, isEncoded=$isEncoded, isFavorite=$isFavorite, isMotion=$isMotion, isNotInAlbum=$isNotInAlbum, isOffline=$isOffline, lensModel=$lensModel, libraryId=$libraryId, make=$make, model=$model, order=$order, originalFileName=$originalFileName, originalPath=$originalPath, page=$page, personIds=$personIds, previewPath=$previewPath, rating=$rating, size=$size, state=$state, tagIds=$tagIds, takenAfter=$takenAfter, takenBefore=$takenBefore, thumbnailPath=$thumbnailPath, trashedAfter=$trashedAfter, trashedBefore=$trashedBefore, type=$type, updatedAfter=$updatedAfter, updatedBefore=$updatedBefore, visibility=$visibility, withDeleted=$withDeleted, withExif=$withExif, withPeople=$withPeople, withStacked=$withStacked]';
String toString() => 'MetadataSearchDto[albumIds=$albumIds, checksum=$checksum, city=$city, country=$country, createdAfter=$createdAfter, createdBefore=$createdBefore, description=$description, deviceAssetId=$deviceAssetId, deviceId=$deviceId, encodedVideoPath=$encodedVideoPath, id=$id, isEncoded=$isEncoded, isFavorite=$isFavorite, isMotion=$isMotion, isNotInAlbum=$isNotInAlbum, isOffline=$isOffline, lensModel=$lensModel, libraryId=$libraryId, make=$make, model=$model, ocr=$ocr, order=$order, originalFileName=$originalFileName, originalPath=$originalPath, page=$page, personIds=$personIds, previewPath=$previewPath, rating=$rating, size=$size, state=$state, tagIds=$tagIds, takenAfter=$takenAfter, takenBefore=$takenBefore, thumbnailPath=$thumbnailPath, trashedAfter=$trashedAfter, trashedBefore=$trashedBefore, type=$type, updatedAfter=$updatedAfter, updatedBefore=$updatedBefore, visibility=$visibility, withDeleted=$withDeleted, withExif=$withExif, withPeople=$withPeople, withStacked=$withStacked]';
Map<String, dynamic> toJson() {
final json = <String, dynamic>{};
@ -540,6 +551,11 @@ class MetadataSearchDto {
json[r'model'] = this.model;
} else {
// json[r'model'] = null;
}
if (this.ocr != null) {
json[r'ocr'] = this.ocr;
} else {
// json[r'ocr'] = null;
}
json[r'order'] = this.order;
if (this.originalFileName != null) {
@ -682,6 +698,7 @@ class MetadataSearchDto {
libraryId: mapValueOfType<String>(json, r'libraryId'),
make: mapValueOfType<String>(json, r'make'),
model: mapValueOfType<String>(json, r'model'),
ocr: mapValueOfType<String>(json, r'ocr'),
order: AssetOrder.fromJson(json[r'order']) ?? AssetOrder.desc,
originalFileName: mapValueOfType<String>(json, r'originalFileName'),
originalPath: mapValueOfType<String>(json, r'originalPath'),

136
mobile/openapi/lib/model/ocr_config.dart generated Normal file
View File

@ -0,0 +1,136 @@
//
// AUTO-GENERATED FILE, DO NOT MODIFY!
//
// @dart=2.18
// ignore_for_file: unused_element, unused_import
// ignore_for_file: always_put_required_named_parameters_first
// ignore_for_file: constant_identifier_names
// ignore_for_file: lines_longer_than_80_chars
part of openapi.api;
class OcrConfig {
/// Returns a new [OcrConfig] instance.
OcrConfig({
required this.enabled,
required this.maxResolution,
required this.minDetectionScore,
required this.minRecognitionScore,
required this.modelName,
});
bool enabled;
/// Minimum value: 1
int maxResolution;
/// Minimum value: 0.1
/// Maximum value: 1
double minDetectionScore;
/// Minimum value: 0.1
/// Maximum value: 1
double minRecognitionScore;
String modelName;
@override
bool operator ==(Object other) => identical(this, other) || other is OcrConfig &&
other.enabled == enabled &&
other.maxResolution == maxResolution &&
other.minDetectionScore == minDetectionScore &&
other.minRecognitionScore == minRecognitionScore &&
other.modelName == modelName;
@override
int get hashCode =>
// ignore: unnecessary_parenthesis
(enabled.hashCode) +
(maxResolution.hashCode) +
(minDetectionScore.hashCode) +
(minRecognitionScore.hashCode) +
(modelName.hashCode);
@override
String toString() => 'OcrConfig[enabled=$enabled, maxResolution=$maxResolution, minDetectionScore=$minDetectionScore, minRecognitionScore=$minRecognitionScore, modelName=$modelName]';
Map<String, dynamic> toJson() {
final json = <String, dynamic>{};
json[r'enabled'] = this.enabled;
json[r'maxResolution'] = this.maxResolution;
json[r'minDetectionScore'] = this.minDetectionScore;
json[r'minRecognitionScore'] = this.minRecognitionScore;
json[r'modelName'] = this.modelName;
return json;
}
/// Returns a new [OcrConfig] instance and imports its values from
/// [value] if it's a [Map], null otherwise.
// ignore: prefer_constructors_over_static_methods
static OcrConfig? fromJson(dynamic value) {
upgradeDto(value, "OcrConfig");
if (value is Map) {
final json = value.cast<String, dynamic>();
return OcrConfig(
enabled: mapValueOfType<bool>(json, r'enabled')!,
maxResolution: mapValueOfType<int>(json, r'maxResolution')!,
minDetectionScore: (mapValueOfType<num>(json, r'minDetectionScore')!).toDouble(),
minRecognitionScore: (mapValueOfType<num>(json, r'minRecognitionScore')!).toDouble(),
modelName: mapValueOfType<String>(json, r'modelName')!,
);
}
return null;
}
static List<OcrConfig> listFromJson(dynamic json, {bool growable = false,}) {
final result = <OcrConfig>[];
if (json is List && json.isNotEmpty) {
for (final row in json) {
final value = OcrConfig.fromJson(row);
if (value != null) {
result.add(value);
}
}
}
return result.toList(growable: growable);
}
static Map<String, OcrConfig> mapFromJson(dynamic json) {
final map = <String, OcrConfig>{};
if (json is Map && json.isNotEmpty) {
json = json.cast<String, dynamic>(); // ignore: parameter_assignments
for (final entry in json.entries) {
final value = OcrConfig.fromJson(entry.value);
if (value != null) {
map[entry.key] = value;
}
}
}
return map;
}
// maps a json object with a list of OcrConfig-objects as value to a dart map
static Map<String, List<OcrConfig>> mapListFromJson(dynamic json, {bool growable = false,}) {
final map = <String, List<OcrConfig>>{};
if (json is Map && json.isNotEmpty) {
// ignore: parameter_assignments
json = json.cast<String, dynamic>();
for (final entry in json.entries) {
map[entry.key] = OcrConfig.listFromJson(entry.value, growable: growable,);
}
}
return map;
}
/// The list of required keys that must be present in a JSON.
static const requiredKeys = <String>{
'enabled',
'maxResolution',
'minDetectionScore',
'minRecognitionScore',
'modelName',
};
}

View File

@ -28,6 +28,7 @@ class RandomSearchDto {
this.libraryId,
this.make,
this.model,
this.ocr,
this.personIds = const [],
this.rating,
this.size,
@ -131,6 +132,14 @@ class RandomSearchDto {
String? model;
///
/// Please note: This property should have been non-nullable! Since the specification file
/// does not include a default value (using the "default:" property), however, the generated
/// source code must fall back to having a nullable type.
/// Consider adding a "default:" property in the specification file to hide this note.
///
String? ocr;
List<String> personIds;
/// Minimum value: -1
@ -270,6 +279,7 @@ class RandomSearchDto {
other.libraryId == libraryId &&
other.make == make &&
other.model == model &&
other.ocr == ocr &&
_deepEquality.equals(other.personIds, personIds) &&
other.rating == rating &&
other.size == size &&
@ -306,6 +316,7 @@ class RandomSearchDto {
(libraryId == null ? 0 : libraryId!.hashCode) +
(make == null ? 0 : make!.hashCode) +
(model == null ? 0 : model!.hashCode) +
(ocr == null ? 0 : ocr!.hashCode) +
(personIds.hashCode) +
(rating == null ? 0 : rating!.hashCode) +
(size == null ? 0 : size!.hashCode) +
@ -325,7 +336,7 @@ class RandomSearchDto {
(withStacked == null ? 0 : withStacked!.hashCode);
@override
String toString() => 'RandomSearchDto[albumIds=$albumIds, city=$city, country=$country, createdAfter=$createdAfter, createdBefore=$createdBefore, deviceId=$deviceId, isEncoded=$isEncoded, isFavorite=$isFavorite, isMotion=$isMotion, isNotInAlbum=$isNotInAlbum, isOffline=$isOffline, lensModel=$lensModel, libraryId=$libraryId, make=$make, model=$model, personIds=$personIds, rating=$rating, size=$size, state=$state, tagIds=$tagIds, takenAfter=$takenAfter, takenBefore=$takenBefore, trashedAfter=$trashedAfter, trashedBefore=$trashedBefore, type=$type, updatedAfter=$updatedAfter, updatedBefore=$updatedBefore, visibility=$visibility, withDeleted=$withDeleted, withExif=$withExif, withPeople=$withPeople, withStacked=$withStacked]';
String toString() => 'RandomSearchDto[albumIds=$albumIds, city=$city, country=$country, createdAfter=$createdAfter, createdBefore=$createdBefore, deviceId=$deviceId, isEncoded=$isEncoded, isFavorite=$isFavorite, isMotion=$isMotion, isNotInAlbum=$isNotInAlbum, isOffline=$isOffline, lensModel=$lensModel, libraryId=$libraryId, make=$make, model=$model, ocr=$ocr, personIds=$personIds, rating=$rating, size=$size, state=$state, tagIds=$tagIds, takenAfter=$takenAfter, takenBefore=$takenBefore, trashedAfter=$trashedAfter, trashedBefore=$trashedBefore, type=$type, updatedAfter=$updatedAfter, updatedBefore=$updatedBefore, visibility=$visibility, withDeleted=$withDeleted, withExif=$withExif, withPeople=$withPeople, withStacked=$withStacked]';
Map<String, dynamic> toJson() {
final json = <String, dynamic>{};
@ -399,6 +410,11 @@ class RandomSearchDto {
json[r'model'] = this.model;
} else {
// json[r'model'] = null;
}
if (this.ocr != null) {
json[r'ocr'] = this.ocr;
} else {
// json[r'ocr'] = null;
}
json[r'personIds'] = this.personIds;
if (this.rating != null) {
@ -510,6 +526,7 @@ class RandomSearchDto {
libraryId: mapValueOfType<String>(json, r'libraryId'),
make: mapValueOfType<String>(json, r'make'),
model: mapValueOfType<String>(json, r'model'),
ocr: mapValueOfType<String>(json, r'ocr'),
personIds: json[r'personIds'] is Iterable
? (json[r'personIds'] as Iterable).cast<String>().toList(growable: false)
: const [],

View File

@ -21,6 +21,7 @@ class ServerFeaturesDto {
required this.map,
required this.oauth,
required this.oauthAutoLaunch,
required this.ocr,
required this.passwordLogin,
required this.reverseGeocoding,
required this.search,
@ -45,6 +46,8 @@ class ServerFeaturesDto {
bool oauthAutoLaunch;
bool ocr;
bool passwordLogin;
bool reverseGeocoding;
@ -67,6 +70,7 @@ class ServerFeaturesDto {
other.map == map &&
other.oauth == oauth &&
other.oauthAutoLaunch == oauthAutoLaunch &&
other.ocr == ocr &&
other.passwordLogin == passwordLogin &&
other.reverseGeocoding == reverseGeocoding &&
other.search == search &&
@ -85,6 +89,7 @@ class ServerFeaturesDto {
(map.hashCode) +
(oauth.hashCode) +
(oauthAutoLaunch.hashCode) +
(ocr.hashCode) +
(passwordLogin.hashCode) +
(reverseGeocoding.hashCode) +
(search.hashCode) +
@ -93,7 +98,7 @@ class ServerFeaturesDto {
(trash.hashCode);
@override
String toString() => 'ServerFeaturesDto[configFile=$configFile, duplicateDetection=$duplicateDetection, email=$email, facialRecognition=$facialRecognition, importFaces=$importFaces, map=$map, oauth=$oauth, oauthAutoLaunch=$oauthAutoLaunch, passwordLogin=$passwordLogin, reverseGeocoding=$reverseGeocoding, search=$search, sidecar=$sidecar, smartSearch=$smartSearch, trash=$trash]';
String toString() => 'ServerFeaturesDto[configFile=$configFile, duplicateDetection=$duplicateDetection, email=$email, facialRecognition=$facialRecognition, importFaces=$importFaces, map=$map, oauth=$oauth, oauthAutoLaunch=$oauthAutoLaunch, ocr=$ocr, passwordLogin=$passwordLogin, reverseGeocoding=$reverseGeocoding, search=$search, sidecar=$sidecar, smartSearch=$smartSearch, trash=$trash]';
Map<String, dynamic> toJson() {
final json = <String, dynamic>{};
@ -105,6 +110,7 @@ class ServerFeaturesDto {
json[r'map'] = this.map;
json[r'oauth'] = this.oauth;
json[r'oauthAutoLaunch'] = this.oauthAutoLaunch;
json[r'ocr'] = this.ocr;
json[r'passwordLogin'] = this.passwordLogin;
json[r'reverseGeocoding'] = this.reverseGeocoding;
json[r'search'] = this.search;
@ -131,6 +137,7 @@ class ServerFeaturesDto {
map: mapValueOfType<bool>(json, r'map')!,
oauth: mapValueOfType<bool>(json, r'oauth')!,
oauthAutoLaunch: mapValueOfType<bool>(json, r'oauthAutoLaunch')!,
ocr: mapValueOfType<bool>(json, r'ocr')!,
passwordLogin: mapValueOfType<bool>(json, r'passwordLogin')!,
reverseGeocoding: mapValueOfType<bool>(json, r'reverseGeocoding')!,
search: mapValueOfType<bool>(json, r'search')!,
@ -192,6 +199,7 @@ class ServerFeaturesDto {
'map',
'oauth',
'oauthAutoLaunch',
'ocr',
'passwordLogin',
'reverseGeocoding',
'search',

View File

@ -29,6 +29,7 @@ class SmartSearchDto {
this.libraryId,
this.make,
this.model,
this.ocr,
this.page,
this.personIds = const [],
this.query,
@ -141,6 +142,14 @@ class SmartSearchDto {
String? model;
///
/// Please note: This property should have been non-nullable! Since the specification file
/// does not include a default value (using the "default:" property), however, the generated
/// source code must fall back to having a nullable type.
/// Consider adding a "default:" property in the specification file to hide this note.
///
String? ocr;
/// Minimum value: 1
///
/// Please note: This property should have been non-nullable! Since the specification file
@ -290,6 +299,7 @@ class SmartSearchDto {
other.libraryId == libraryId &&
other.make == make &&
other.model == model &&
other.ocr == ocr &&
other.page == page &&
_deepEquality.equals(other.personIds, personIds) &&
other.query == query &&
@ -328,6 +338,7 @@ class SmartSearchDto {
(libraryId == null ? 0 : libraryId!.hashCode) +
(make == null ? 0 : make!.hashCode) +
(model == null ? 0 : model!.hashCode) +
(ocr == null ? 0 : ocr!.hashCode) +
(page == null ? 0 : page!.hashCode) +
(personIds.hashCode) +
(query == null ? 0 : query!.hashCode) +
@ -348,7 +359,7 @@ class SmartSearchDto {
(withExif == null ? 0 : withExif!.hashCode);
@override
String toString() => 'SmartSearchDto[albumIds=$albumIds, city=$city, country=$country, createdAfter=$createdAfter, createdBefore=$createdBefore, deviceId=$deviceId, isEncoded=$isEncoded, isFavorite=$isFavorite, isMotion=$isMotion, isNotInAlbum=$isNotInAlbum, isOffline=$isOffline, language=$language, lensModel=$lensModel, libraryId=$libraryId, make=$make, model=$model, page=$page, personIds=$personIds, query=$query, queryAssetId=$queryAssetId, rating=$rating, size=$size, state=$state, tagIds=$tagIds, takenAfter=$takenAfter, takenBefore=$takenBefore, trashedAfter=$trashedAfter, trashedBefore=$trashedBefore, type=$type, updatedAfter=$updatedAfter, updatedBefore=$updatedBefore, visibility=$visibility, withDeleted=$withDeleted, withExif=$withExif]';
String toString() => 'SmartSearchDto[albumIds=$albumIds, city=$city, country=$country, createdAfter=$createdAfter, createdBefore=$createdBefore, deviceId=$deviceId, isEncoded=$isEncoded, isFavorite=$isFavorite, isMotion=$isMotion, isNotInAlbum=$isNotInAlbum, isOffline=$isOffline, language=$language, lensModel=$lensModel, libraryId=$libraryId, make=$make, model=$model, ocr=$ocr, page=$page, personIds=$personIds, query=$query, queryAssetId=$queryAssetId, rating=$rating, size=$size, state=$state, tagIds=$tagIds, takenAfter=$takenAfter, takenBefore=$takenBefore, trashedAfter=$trashedAfter, trashedBefore=$trashedBefore, type=$type, updatedAfter=$updatedAfter, updatedBefore=$updatedBefore, visibility=$visibility, withDeleted=$withDeleted, withExif=$withExif]';
Map<String, dynamic> toJson() {
final json = <String, dynamic>{};
@ -428,6 +439,11 @@ class SmartSearchDto {
} else {
// json[r'model'] = null;
}
if (this.ocr != null) {
json[r'ocr'] = this.ocr;
} else {
// json[r'ocr'] = null;
}
if (this.page != null) {
json[r'page'] = this.page;
} else {
@ -544,6 +560,7 @@ class SmartSearchDto {
libraryId: mapValueOfType<String>(json, r'libraryId'),
make: mapValueOfType<String>(json, r'make'),
model: mapValueOfType<String>(json, r'model'),
ocr: mapValueOfType<String>(json, r'ocr'),
page: num.parse('${json[r'page']}'),
personIds: json[r'personIds'] is Iterable
? (json[r'personIds'] as Iterable).cast<String>().toList(growable: false)

View File

@ -29,6 +29,7 @@ class StatisticsSearchDto {
this.libraryId,
this.make,
this.model,
this.ocr,
this.personIds = const [],
this.rating,
this.state,
@ -135,6 +136,14 @@ class StatisticsSearchDto {
String? model;
///
/// Please note: This property should have been non-nullable! Since the specification file
/// does not include a default value (using the "default:" property), however, the generated
/// source code must fall back to having a nullable type.
/// Consider adding a "default:" property in the specification file to hide this note.
///
String? ocr;
List<String> personIds;
/// Minimum value: -1
@ -233,6 +242,7 @@ class StatisticsSearchDto {
other.libraryId == libraryId &&
other.make == make &&
other.model == model &&
other.ocr == ocr &&
_deepEquality.equals(other.personIds, personIds) &&
other.rating == rating &&
other.state == state &&
@ -265,6 +275,7 @@ class StatisticsSearchDto {
(libraryId == null ? 0 : libraryId!.hashCode) +
(make == null ? 0 : make!.hashCode) +
(model == null ? 0 : model!.hashCode) +
(ocr == null ? 0 : ocr!.hashCode) +
(personIds.hashCode) +
(rating == null ? 0 : rating!.hashCode) +
(state == null ? 0 : state!.hashCode) +
@ -279,7 +290,7 @@ class StatisticsSearchDto {
(visibility == null ? 0 : visibility!.hashCode);
@override
String toString() => 'StatisticsSearchDto[albumIds=$albumIds, city=$city, country=$country, createdAfter=$createdAfter, createdBefore=$createdBefore, description=$description, deviceId=$deviceId, isEncoded=$isEncoded, isFavorite=$isFavorite, isMotion=$isMotion, isNotInAlbum=$isNotInAlbum, isOffline=$isOffline, lensModel=$lensModel, libraryId=$libraryId, make=$make, model=$model, personIds=$personIds, rating=$rating, state=$state, tagIds=$tagIds, takenAfter=$takenAfter, takenBefore=$takenBefore, trashedAfter=$trashedAfter, trashedBefore=$trashedBefore, type=$type, updatedAfter=$updatedAfter, updatedBefore=$updatedBefore, visibility=$visibility]';
String toString() => 'StatisticsSearchDto[albumIds=$albumIds, city=$city, country=$country, createdAfter=$createdAfter, createdBefore=$createdBefore, description=$description, deviceId=$deviceId, isEncoded=$isEncoded, isFavorite=$isFavorite, isMotion=$isMotion, isNotInAlbum=$isNotInAlbum, isOffline=$isOffline, lensModel=$lensModel, libraryId=$libraryId, make=$make, model=$model, ocr=$ocr, personIds=$personIds, rating=$rating, state=$state, tagIds=$tagIds, takenAfter=$takenAfter, takenBefore=$takenBefore, trashedAfter=$trashedAfter, trashedBefore=$trashedBefore, type=$type, updatedAfter=$updatedAfter, updatedBefore=$updatedBefore, visibility=$visibility]';
Map<String, dynamic> toJson() {
final json = <String, dynamic>{};
@ -358,6 +369,11 @@ class StatisticsSearchDto {
json[r'model'] = this.model;
} else {
// json[r'model'] = null;
}
if (this.ocr != null) {
json[r'ocr'] = this.ocr;
} else {
// json[r'ocr'] = null;
}
json[r'personIds'] = this.personIds;
if (this.rating != null) {
@ -445,6 +461,7 @@ class StatisticsSearchDto {
libraryId: mapValueOfType<String>(json, r'libraryId'),
make: mapValueOfType<String>(json, r'make'),
model: mapValueOfType<String>(json, r'model'),
ocr: mapValueOfType<String>(json, r'ocr'),
personIds: json[r'personIds'] is Iterable
? (json[r'personIds'] as Iterable).cast<String>().toList(growable: false)
: const [],

View File

@ -19,6 +19,7 @@ class SystemConfigJobDto {
required this.metadataExtraction,
required this.migration,
required this.notifications,
required this.ocr,
required this.search,
required this.sidecar,
required this.smartSearch,
@ -38,6 +39,8 @@ class SystemConfigJobDto {
JobSettingsDto notifications;
JobSettingsDto ocr;
JobSettingsDto search;
JobSettingsDto sidecar;
@ -56,6 +59,7 @@ class SystemConfigJobDto {
other.metadataExtraction == metadataExtraction &&
other.migration == migration &&
other.notifications == notifications &&
other.ocr == ocr &&
other.search == search &&
other.sidecar == sidecar &&
other.smartSearch == smartSearch &&
@ -71,6 +75,7 @@ class SystemConfigJobDto {
(metadataExtraction.hashCode) +
(migration.hashCode) +
(notifications.hashCode) +
(ocr.hashCode) +
(search.hashCode) +
(sidecar.hashCode) +
(smartSearch.hashCode) +
@ -78,7 +83,7 @@ class SystemConfigJobDto {
(videoConversion.hashCode);
@override
String toString() => 'SystemConfigJobDto[backgroundTask=$backgroundTask, faceDetection=$faceDetection, library_=$library_, metadataExtraction=$metadataExtraction, migration=$migration, notifications=$notifications, search=$search, sidecar=$sidecar, smartSearch=$smartSearch, thumbnailGeneration=$thumbnailGeneration, videoConversion=$videoConversion]';
String toString() => 'SystemConfigJobDto[backgroundTask=$backgroundTask, faceDetection=$faceDetection, library_=$library_, metadataExtraction=$metadataExtraction, migration=$migration, notifications=$notifications, ocr=$ocr, search=$search, sidecar=$sidecar, smartSearch=$smartSearch, thumbnailGeneration=$thumbnailGeneration, videoConversion=$videoConversion]';
Map<String, dynamic> toJson() {
final json = <String, dynamic>{};
@ -88,6 +93,7 @@ class SystemConfigJobDto {
json[r'metadataExtraction'] = this.metadataExtraction;
json[r'migration'] = this.migration;
json[r'notifications'] = this.notifications;
json[r'ocr'] = this.ocr;
json[r'search'] = this.search;
json[r'sidecar'] = this.sidecar;
json[r'smartSearch'] = this.smartSearch;
@ -111,6 +117,7 @@ class SystemConfigJobDto {
metadataExtraction: JobSettingsDto.fromJson(json[r'metadataExtraction'])!,
migration: JobSettingsDto.fromJson(json[r'migration'])!,
notifications: JobSettingsDto.fromJson(json[r'notifications'])!,
ocr: JobSettingsDto.fromJson(json[r'ocr'])!,
search: JobSettingsDto.fromJson(json[r'search'])!,
sidecar: JobSettingsDto.fromJson(json[r'sidecar'])!,
smartSearch: JobSettingsDto.fromJson(json[r'smartSearch'])!,
@ -169,6 +176,7 @@ class SystemConfigJobDto {
'metadataExtraction',
'migration',
'notifications',
'ocr',
'search',
'sidecar',
'smartSearch',

View File

@ -18,6 +18,7 @@ class SystemConfigMachineLearningDto {
required this.duplicateDetection,
required this.enabled,
required this.facialRecognition,
required this.ocr,
this.urls = const [],
});
@ -31,6 +32,8 @@ class SystemConfigMachineLearningDto {
FacialRecognitionConfig facialRecognition;
OcrConfig ocr;
List<String> urls;
@override
@ -40,6 +43,7 @@ class SystemConfigMachineLearningDto {
other.duplicateDetection == duplicateDetection &&
other.enabled == enabled &&
other.facialRecognition == facialRecognition &&
other.ocr == ocr &&
_deepEquality.equals(other.urls, urls);
@override
@ -50,10 +54,11 @@ class SystemConfigMachineLearningDto {
(duplicateDetection.hashCode) +
(enabled.hashCode) +
(facialRecognition.hashCode) +
(ocr.hashCode) +
(urls.hashCode);
@override
String toString() => 'SystemConfigMachineLearningDto[availabilityChecks=$availabilityChecks, clip=$clip, duplicateDetection=$duplicateDetection, enabled=$enabled, facialRecognition=$facialRecognition, urls=$urls]';
String toString() => 'SystemConfigMachineLearningDto[availabilityChecks=$availabilityChecks, clip=$clip, duplicateDetection=$duplicateDetection, enabled=$enabled, facialRecognition=$facialRecognition, ocr=$ocr, urls=$urls]';
Map<String, dynamic> toJson() {
final json = <String, dynamic>{};
@ -62,6 +67,7 @@ class SystemConfigMachineLearningDto {
json[r'duplicateDetection'] = this.duplicateDetection;
json[r'enabled'] = this.enabled;
json[r'facialRecognition'] = this.facialRecognition;
json[r'ocr'] = this.ocr;
json[r'urls'] = this.urls;
return json;
}
@ -80,6 +86,7 @@ class SystemConfigMachineLearningDto {
duplicateDetection: DuplicateDetectionConfig.fromJson(json[r'duplicateDetection'])!,
enabled: mapValueOfType<bool>(json, r'enabled')!,
facialRecognition: FacialRecognitionConfig.fromJson(json[r'facialRecognition'])!,
ocr: OcrConfig.fromJson(json[r'ocr'])!,
urls: json[r'urls'] is Iterable
? (json[r'urls'] as Iterable).cast<String>().toList(growable: false)
: const [],
@ -135,6 +142,7 @@ class SystemConfigMachineLearningDto {
'duplicateDetection',
'enabled',
'facialRecognition',
'ocr',
'urls',
};
}

View File

@ -5984,6 +5984,14 @@
"type": "string"
}
},
{
"name": "ocr",
"required": false,
"in": "query",
"schema": {
"type": "string"
}
},
{
"name": "personIds",
"required": false,
@ -10408,6 +10416,9 @@
"notifications": {
"$ref": "#/components/schemas/JobStatusDto"
},
"ocr": {
"$ref": "#/components/schemas/JobStatusDto"
},
"search": {
"$ref": "#/components/schemas/JobStatusDto"
},
@ -10437,6 +10448,7 @@
"metadataExtraction",
"migration",
"notifications",
"ocr",
"search",
"sidecar",
"smartSearch",
@ -12108,7 +12120,8 @@
"sidecar",
"library",
"notifications",
"backupDatabase"
"backupDatabase",
"ocr"
],
"type": "string"
},
@ -12678,6 +12691,9 @@
"nullable": true,
"type": "string"
},
"ocr": {
"type": "string"
},
"order": {
"allOf": [
{
@ -12989,6 +13005,40 @@
],
"type": "string"
},
"OcrConfig": {
"properties": {
"enabled": {
"type": "boolean"
},
"maxResolution": {
"minimum": 1,
"type": "integer"
},
"minDetectionScore": {
"format": "double",
"maximum": 1,
"minimum": 0.1,
"type": "number"
},
"minRecognitionScore": {
"format": "double",
"maximum": 1,
"minimum": 0.1,
"type": "number"
},
"modelName": {
"type": "string"
}
},
"required": [
"enabled",
"maxResolution",
"minDetectionScore",
"minRecognitionScore",
"modelName"
],
"type": "object"
},
"OnThisDayDto": {
"properties": {
"year": {
@ -13659,6 +13709,9 @@
"nullable": true,
"type": "string"
},
"ocr": {
"type": "string"
},
"personIds": {
"items": {
"format": "uuid",
@ -14127,6 +14180,9 @@
"oauthAutoLaunch": {
"type": "boolean"
},
"ocr": {
"type": "boolean"
},
"passwordLogin": {
"type": "boolean"
},
@ -14155,6 +14211,7 @@
"map",
"oauth",
"oauthAutoLaunch",
"ocr",
"passwordLogin",
"reverseGeocoding",
"search",
@ -14762,6 +14819,9 @@
"nullable": true,
"type": "string"
},
"ocr": {
"type": "string"
},
"page": {
"minimum": 1,
"type": "number"
@ -14967,6 +15027,9 @@
"nullable": true,
"type": "string"
},
"ocr": {
"type": "string"
},
"personIds": {
"items": {
"format": "uuid",
@ -16416,6 +16479,9 @@
"notifications": {
"$ref": "#/components/schemas/JobSettingsDto"
},
"ocr": {
"$ref": "#/components/schemas/JobSettingsDto"
},
"search": {
"$ref": "#/components/schemas/JobSettingsDto"
},
@ -16439,6 +16505,7 @@
"metadataExtraction",
"migration",
"notifications",
"ocr",
"search",
"sidecar",
"smartSearch",
@ -16524,6 +16591,9 @@
"facialRecognition": {
"$ref": "#/components/schemas/FacialRecognitionConfig"
},
"ocr": {
"$ref": "#/components/schemas/OcrConfig"
},
"urls": {
"format": "uri",
"items": {
@ -16540,6 +16610,7 @@
"duplicateDetection",
"enabled",
"facialRecognition",
"ocr",
"urls"
],
"type": "object"

View File

@ -696,6 +696,7 @@ export type AllJobStatusResponseDto = {
metadataExtraction: JobStatusDto;
migration: JobStatusDto;
notifications: JobStatusDto;
ocr: JobStatusDto;
search: JobStatusDto;
sidecar: JobStatusDto;
smartSearch: JobStatusDto;
@ -926,6 +927,7 @@ export type MetadataSearchDto = {
libraryId?: string | null;
make?: string;
model?: string | null;
ocr?: string;
order?: AssetOrder;
originalFileName?: string;
originalPath?: string;
@ -998,6 +1000,7 @@ export type RandomSearchDto = {
libraryId?: string | null;
make?: string;
model?: string | null;
ocr?: string;
personIds?: string[];
rating?: number;
size?: number;
@ -1033,6 +1036,7 @@ export type SmartSearchDto = {
libraryId?: string | null;
make?: string;
model?: string | null;
ocr?: string;
page?: number;
personIds?: string[];
query?: string;
@ -1069,6 +1073,7 @@ export type StatisticsSearchDto = {
libraryId?: string | null;
make?: string;
model?: string | null;
ocr?: string;
personIds?: string[];
rating?: number;
state?: string | null;
@ -1135,6 +1140,7 @@ export type ServerFeaturesDto = {
map: boolean;
oauth: boolean;
oauthAutoLaunch: boolean;
ocr: boolean;
passwordLogin: boolean;
reverseGeocoding: boolean;
search: boolean;
@ -1371,6 +1377,7 @@ export type SystemConfigJobDto = {
metadataExtraction: JobSettingsDto;
migration: JobSettingsDto;
notifications: JobSettingsDto;
ocr: JobSettingsDto;
search: JobSettingsDto;
sidecar: JobSettingsDto;
smartSearch: JobSettingsDto;
@ -1412,12 +1419,20 @@ export type FacialRecognitionConfig = {
minScore: number;
modelName: string;
};
export type OcrConfig = {
enabled: boolean;
maxResolution: number;
minDetectionScore: number;
minRecognitionScore: number;
modelName: string;
};
export type SystemConfigMachineLearningDto = {
availabilityChecks: MachineLearningAvailabilityChecksDto;
clip: ClipConfig;
duplicateDetection: DuplicateDetectionConfig;
enabled: boolean;
facialRecognition: FacialRecognitionConfig;
ocr: OcrConfig;
urls: string[];
};
export type SystemConfigMapDto = {
@ -3399,7 +3414,7 @@ export function getExploreData(opts?: Oazapfts.RequestOpts) {
/**
* This endpoint requires the `asset.read` permission.
*/
export function searchLargeAssets({ albumIds, city, country, createdAfter, createdBefore, deviceId, isEncoded, isFavorite, isMotion, isNotInAlbum, isOffline, lensModel, libraryId, make, minFileSize, model, personIds, rating, size, state, tagIds, takenAfter, takenBefore, trashedAfter, trashedBefore, $type, updatedAfter, updatedBefore, visibility, withDeleted, withExif }: {
export function searchLargeAssets({ albumIds, city, country, createdAfter, createdBefore, deviceId, isEncoded, isFavorite, isMotion, isNotInAlbum, isOffline, lensModel, libraryId, make, minFileSize, model, ocr, personIds, rating, size, state, tagIds, takenAfter, takenBefore, trashedAfter, trashedBefore, $type, updatedAfter, updatedBefore, visibility, withDeleted, withExif }: {
albumIds?: string[];
city?: string | null;
country?: string | null;
@ -3416,6 +3431,7 @@ export function searchLargeAssets({ albumIds, city, country, createdAfter, creat
make?: string;
minFileSize?: number;
model?: string | null;
ocr?: string;
personIds?: string[];
rating?: number;
size?: number;
@ -3452,6 +3468,7 @@ export function searchLargeAssets({ albumIds, city, country, createdAfter, creat
make,
minFileSize,
model,
ocr,
personIds,
rating,
size,
@ -4901,7 +4918,8 @@ export enum JobName {
Sidecar = "sidecar",
Library = "library",
Notifications = "notifications",
BackupDatabase = "backupDatabase"
BackupDatabase = "backupDatabase",
Ocr = "ocr"
}
export enum JobCommand {
Start = "start",

View File

@ -74,6 +74,13 @@ export interface SystemConfig {
minFaces: number;
maxDistance: number;
};
ocr: {
enabled: boolean;
modelName: string;
minDetectionScore: number;
minRecognitionScore: number;
maxResolution: number;
};
};
map: {
enabled: boolean;
@ -227,6 +234,7 @@ export const defaults = Object.freeze<SystemConfig>({
[QueueName.ThumbnailGeneration]: { concurrency: 3 },
[QueueName.VideoConversion]: { concurrency: 1 },
[QueueName.Notification]: { concurrency: 5 },
[QueueName.Ocr]: { concurrency: 1 },
},
logging: {
enabled: true,
@ -255,6 +263,13 @@ export const defaults = Object.freeze<SystemConfig>({
maxDistance: 0.5,
minFaces: 3,
},
ocr: {
enabled: true,
modelName: 'PP-OCRv5_mobile',
minDetectionScore: 0.5,
minRecognitionScore: 0.8,
maxResolution: 736,
},
},
map: {
enabled: true,

View File

@ -93,4 +93,7 @@ export class AllJobStatusResponseDto implements Record<QueueName, JobStatusDto>
@ApiProperty({ type: JobStatusDto })
[QueueName.BackupDatabase]!: JobStatusDto;
@ApiProperty({ type: JobStatusDto })
[QueueName.Ocr]!: JobStatusDto;
}

View File

@ -46,3 +46,25 @@ export class FacialRecognitionConfig extends ModelConfig {
@ApiProperty({ type: 'integer' })
minFaces!: number;
}
export class OcrConfig extends ModelConfig {
@IsNumber()
@Min(1)
@Type(() => Number)
@ApiProperty({ type: 'integer' })
maxResolution!: number;
@IsNumber()
@Min(0.1)
@Max(1)
@Type(() => Number)
@ApiProperty({ type: 'number', format: 'double' })
minDetectionScore!: number;
@IsNumber()
@Min(0.1)
@Max(1)
@Type(() => Number)
@ApiProperty({ type: 'number', format: 'double' })
minRecognitionScore!: number;
}

View File

@ -101,6 +101,11 @@ class BaseSearchDto {
@Max(5)
@Min(-1)
rating?: number;
@IsString()
@IsNotEmpty()
@Optional()
ocr?: string;
}
class BaseSearchWithResultsDto extends BaseSearchDto {

View File

@ -171,6 +171,7 @@ export class ServerFeaturesDto {
sidecar!: boolean;
search!: boolean;
email!: boolean;
ocr!: boolean;
}
export interface ReleaseNotification {

View File

@ -15,7 +15,7 @@ import {
ValidateNested,
} from 'class-validator';
import { SystemConfig } from 'src/config';
import { CLIPConfig, DuplicateDetectionConfig, FacialRecognitionConfig } from 'src/dtos/model-config.dto';
import { CLIPConfig, DuplicateDetectionConfig, FacialRecognitionConfig, OcrConfig } from 'src/dtos/model-config.dto';
import {
AudioCodec,
CQMode,
@ -201,6 +201,12 @@ class SystemConfigJobDto implements Record<ConcurrentQueueName, JobSettingsDto>
@Type(() => JobSettingsDto)
[QueueName.FaceDetection]!: JobSettingsDto;
@ApiProperty({ type: JobSettingsDto })
@ValidateNested()
@IsObject()
@Type(() => JobSettingsDto)
[QueueName.Ocr]!: JobSettingsDto;
@ApiProperty({ type: JobSettingsDto })
@ValidateNested()
@IsObject()
@ -296,6 +302,11 @@ class SystemConfigMachineLearningDto {
@ValidateNested()
@IsObject()
facialRecognition!: FacialRecognitionConfig;
@Type(() => OcrConfig)
@ValidateNested()
@IsObject()
ocr!: OcrConfig;
}
enum MapTheme {

View File

@ -513,6 +513,7 @@ export enum QueueName {
Library = 'library',
Notification = 'notifications',
BackupDatabase = 'backupDatabase',
Ocr = 'ocr',
}
export enum JobName {
@ -585,6 +586,10 @@ export enum JobName {
TagCleanup = 'TagCleanup',
VersionCheck = 'VersionCheck',
// OCR
OcrQueueAll = 'OcrQueueAll',
Ocr = 'Ocr',
}
export enum JobCommand {

View File

@ -285,6 +285,23 @@ from
where
"asset"."id" = $2
-- AssetJobRepository.getForOcr
select
"asset"."visibility",
(
select
"asset_file"."path"
from
"asset_file"
where
"asset_file"."assetId" = "asset"."id"
and "asset_file"."type" = $1
) as "previewFile"
from
"asset"
where
"asset"."id" = $2
-- AssetJobRepository.getForSyncAssets
select
"asset"."id",
@ -483,6 +500,17 @@ where
order by
"asset"."fileCreatedAt" desc
-- AssetJobRepository.streamForOcrJob
select
"asset"."id"
from
"asset"
inner join "asset_job_status" on "asset_job_status"."assetId" = "asset"."id"
where
"asset_job_status"."ocrAt" is null
and "asset"."deletedAt" is null
and "asset"."visibility" != $1
-- AssetJobRepository.streamForMigrationJob
select
"id"

View File

@ -0,0 +1,68 @@
-- NOTE: This file is auto generated by ./sql-generator
-- OcrRepository.getById
select
"asset_ocr".*
from
"asset_ocr"
where
"asset_ocr"."id" = $1
-- OcrRepository.getByAssetId
select
"asset_ocr".*
from
"asset_ocr"
where
"asset_ocr"."assetId" = $1
-- OcrRepository.upsert
with
"deleted_ocr" as (
delete from "asset_ocr"
where
"assetId" = $1
),
"inserted_ocr" as (
insert into
"asset_ocr" (
"assetId",
"x1",
"y1",
"x2",
"y2",
"x3",
"y3",
"x4",
"y4",
"text",
"boxScore",
"textScore"
)
values
(
$2,
$3,
$4,
$5,
$6,
$7,
$8,
$9,
$10,
$11,
$12,
$13
)
),
"inserted_search" as (
insert into
"ocr_search" ("assetId", "text")
values
($14, $15)
on conflict ("assetId") do update
set
"text" = "excluded"."text"
)
select
1 as "dummy"

View File

@ -16,6 +16,7 @@ import {
withExifInner,
withFaces,
withFacesAndPeople,
withFilePath,
withFiles,
} from 'src/utils/database';
@ -192,6 +193,15 @@ export class AssetJobRepository {
.executeTakeFirst();
}
@GenerateSql({ params: [DummyValue.UUID] })
getForOcr(id: string) {
return this.db
.selectFrom('asset')
.select((eb) => ['asset.visibility', withFilePath(eb, AssetFileType.Preview).as('previewFile')])
.where('asset.id', '=', id)
.executeTakeFirst();
}
@GenerateSql({ params: [[DummyValue.UUID]] })
getForSyncAssets(ids: string[]) {
return this.db
@ -348,6 +358,21 @@ export class AssetJobRepository {
.stream();
}
@GenerateSql({ params: [], stream: true })
streamForOcrJob(force?: boolean) {
return this.db
.selectFrom('asset')
.select(['asset.id'])
.$if(!force, (qb) =>
qb
.innerJoin('asset_job_status', 'asset_job_status.assetId', 'asset.id')
.where('asset_job_status.ocrAt', 'is', null),
)
.where('asset.deletedAt', 'is', null)
.where('asset.visibility', '!=', AssetVisibility.Hidden)
.stream();
}
@GenerateSql({ params: [DummyValue.DATE], stream: true })
streamForMigrationJob() {
return this.db.selectFrom('asset').select(['id']).where('asset.deletedAt', 'is', null).stream();

View File

@ -205,6 +205,7 @@ export class AssetRepository {
metadataExtractedAt: eb.ref('excluded.metadataExtractedAt'),
previewAt: eb.ref('excluded.previewAt'),
thumbnailAt: eb.ref('excluded.thumbnailAt'),
ocrAt: eb.ref('excluded.ocrAt'),
},
values[0],
),

View File

@ -25,6 +25,7 @@ import { MetadataRepository } from 'src/repositories/metadata.repository';
import { MoveRepository } from 'src/repositories/move.repository';
import { NotificationRepository } from 'src/repositories/notification.repository';
import { OAuthRepository } from 'src/repositories/oauth.repository';
import { OcrRepository } from 'src/repositories/ocr.repository';
import { PartnerRepository } from 'src/repositories/partner.repository';
import { PersonRepository } from 'src/repositories/person.repository';
import { ProcessRepository } from 'src/repositories/process.repository';
@ -74,6 +75,7 @@ export const repositories = [
MoveRepository,
NotificationRepository,
OAuthRepository,
OcrRepository,
PartnerRepository,
PersonRepository,
ProcessRepository,

View File

@ -15,6 +15,7 @@ export interface BoundingBox {
export enum ModelTask {
FACIAL_RECOGNITION = 'facial-recognition',
SEARCH = 'clip',
OCR = 'ocr',
}
export enum ModelType {
@ -23,6 +24,7 @@ export enum ModelType {
RECOGNITION = 'recognition',
TEXTUAL = 'textual',
VISUAL = 'visual',
OCR = 'ocr',
}
export type ModelPayload = { imagePath: string } | { text: string };
@ -30,7 +32,11 @@ export type ModelPayload = { imagePath: string } | { text: string };
type ModelOptions = { modelName: string };
export type FaceDetectionOptions = ModelOptions & { minScore: number };
export type OcrOptions = ModelOptions & {
minDetectionScore: number;
minRecognitionScore: number;
maxResolution: number;
};
type VisualResponse = { imageHeight: number; imageWidth: number };
export type ClipVisualRequest = { [ModelTask.SEARCH]: { [ModelType.VISUAL]: ModelOptions } };
export type ClipVisualResponse = { [ModelTask.SEARCH]: string } & VisualResponse;
@ -38,6 +44,21 @@ export type ClipVisualResponse = { [ModelTask.SEARCH]: string } & VisualResponse
export type ClipTextualRequest = { [ModelTask.SEARCH]: { [ModelType.TEXTUAL]: ModelOptions } };
export type ClipTextualResponse = { [ModelTask.SEARCH]: string };
export type OCR = {
text: string[];
box: number[];
boxScore: number[];
textScore: number[];
};
export type OcrRequest = {
[ModelTask.OCR]: {
[ModelType.DETECTION]: ModelOptions & { options: { minScore: number; maxResolution: number } };
[ModelType.RECOGNITION]: ModelOptions & { options: { minScore: number } };
};
};
export type OcrResponse = { [ModelTask.OCR]: OCR } & VisualResponse;
export type FacialRecognitionRequest = {
[ModelTask.FACIAL_RECOGNITION]: {
[ModelType.DETECTION]: ModelOptions & { options: { minScore: number } };
@ -53,7 +74,7 @@ export interface Face {
export type FacialRecognitionResponse = { [ModelTask.FACIAL_RECOGNITION]: Face[] } & VisualResponse;
export type DetectedFaces = { faces: Face[] } & VisualResponse;
export type MachineLearningRequest = ClipVisualRequest | ClipTextualRequest | FacialRecognitionRequest;
export type MachineLearningRequest = ClipVisualRequest | ClipTextualRequest | FacialRecognitionRequest | OcrRequest;
export type TextEncodingOptions = ModelOptions & { language?: string };
@Injectable()
@ -197,6 +218,17 @@ export class MachineLearningRepository {
return response[ModelTask.SEARCH];
}
async ocr(imagePath: string, { modelName, minDetectionScore, minRecognitionScore, maxResolution }: OcrOptions) {
const request = {
[ModelTask.OCR]: {
[ModelType.DETECTION]: { modelName, options: { minScore: minDetectionScore, maxResolution } },
[ModelType.RECOGNITION]: { modelName, options: { minScore: minRecognitionScore } },
},
};
const response = await this.predict<OcrResponse>({ imagePath }, request);
return response[ModelTask.OCR];
}
private async getFormData(payload: ModelPayload, config: MachineLearningRequest): Promise<FormData> {
const formData = new FormData();
formData.append('entries', JSON.stringify(config));

View File

@ -0,0 +1,68 @@
import { Injectable } from '@nestjs/common';
import { Insertable, Kysely, sql } from 'kysely';
import { InjectKysely } from 'nestjs-kysely';
import { DummyValue, GenerateSql } from 'src/decorators';
import { DB } from 'src/schema';
import { AssetOcrTable } from 'src/schema/tables/asset-ocr.table';
@Injectable()
export class OcrRepository {
constructor(@InjectKysely() private db: Kysely<DB>) {}
@GenerateSql({ params: [DummyValue.UUID] })
getById(id: string) {
return this.db.selectFrom('asset_ocr').selectAll('asset_ocr').where('asset_ocr.id', '=', id).executeTakeFirst();
}
@GenerateSql({ params: [DummyValue.UUID] })
getByAssetId(id: string) {
return this.db.selectFrom('asset_ocr').selectAll('asset_ocr').where('asset_ocr.assetId', '=', id).execute();
}
deleteAll() {
return this.db.transaction().execute(async (trx: Kysely<DB>) => {
await sql`truncate ${sql.table('asset_ocr')}`.execute(trx);
await sql`truncate ${sql.table('ocr_search')}`.execute(trx);
});
}
@GenerateSql({
params: [
DummyValue.UUID,
[
{
assetId: DummyValue.UUID,
x1: DummyValue.NUMBER,
y1: DummyValue.NUMBER,
x2: DummyValue.NUMBER,
y2: DummyValue.NUMBER,
x3: DummyValue.NUMBER,
y3: DummyValue.NUMBER,
x4: DummyValue.NUMBER,
y4: DummyValue.NUMBER,
text: DummyValue.STRING,
boxScore: DummyValue.NUMBER,
textScore: DummyValue.NUMBER,
},
],
],
})
upsert(assetId: string, ocrDataList: Insertable<AssetOcrTable>[]) {
let query = this.db.with('deleted_ocr', (db) => db.deleteFrom('asset_ocr').where('assetId', '=', assetId));
if (ocrDataList.length > 0) {
const searchText = ocrDataList.map((item) => item.text.trim()).join(' ');
(query as any) = query
.with('inserted_ocr', (db) => db.insertInto('asset_ocr').values(ocrDataList))
.with('inserted_search', (db) =>
db
.insertInto('ocr_search')
.values({ assetId, text: searchText })
.onConflict((oc) => oc.column('assetId').doUpdateSet((eb) => ({ text: eb.ref('excluded.text') }))),
);
} else {
(query as any) = query.with('deleted_search', (db) => db.deleteFrom('ocr_search').where('assetId', '=', assetId));
}
return query.selectNoFrom(sql`1`.as('dummy')).execute();
}
}

View File

@ -84,6 +84,10 @@ export interface SearchEmbeddingOptions {
userIds: string[];
}
export interface SearchOcrOptions {
ocr?: string;
}
export interface SearchPeopleOptions {
personIds?: string[];
}
@ -114,7 +118,8 @@ type BaseAssetSearchOptions = SearchDateOptions &
SearchUserIdOptions &
SearchPeopleOptions &
SearchTagOptions &
SearchAlbumOptions;
SearchAlbumOptions &
SearchOcrOptions;
export type AssetSearchOptions = BaseAssetSearchOptions & SearchRelationOptions;
@ -127,7 +132,10 @@ export type SmartSearchOptions = SearchDateOptions &
SearchStatusOptions &
SearchUserIdOptions &
SearchPeopleOptions &
SearchTagOptions;
SearchTagOptions &
SearchOcrOptions;
export type OcrSearchOptions = SearchDateOptions & SearchOcrOptions;
export type LargeAssetSearchOptions = AssetSearchOptions & { minFileSize?: number };

View File

@ -35,6 +35,7 @@ import { AssetFileTable } from 'src/schema/tables/asset-file.table';
import { AssetJobStatusTable } from 'src/schema/tables/asset-job-status.table';
import { AssetMetadataAuditTable } from 'src/schema/tables/asset-metadata-audit.table';
import { AssetMetadataTable } from 'src/schema/tables/asset-metadata.table';
import { AssetOcrTable } from 'src/schema/tables/asset-ocr.table';
import { AssetTable } from 'src/schema/tables/asset.table';
import { AuditTable } from 'src/schema/tables/audit.table';
import { FaceSearchTable } from 'src/schema/tables/face-search.table';
@ -47,6 +48,7 @@ import { MemoryTable } from 'src/schema/tables/memory.table';
import { MoveTable } from 'src/schema/tables/move.table';
import { NaturalEarthCountriesTable } from 'src/schema/tables/natural-earth-countries.table';
import { NotificationTable } from 'src/schema/tables/notification.table';
import { OcrSearchTable } from 'src/schema/tables/ocr-search.table';
import { PartnerAuditTable } from 'src/schema/tables/partner-audit.table';
import { PartnerTable } from 'src/schema/tables/partner.table';
import { PersonAuditTable } from 'src/schema/tables/person-audit.table';
@ -87,6 +89,7 @@ export class ImmichDatabase {
AssetMetadataTable,
AssetMetadataAuditTable,
AssetJobStatusTable,
AssetOcrTable,
AssetTable,
AssetFileTable,
AuditTable,
@ -101,6 +104,7 @@ export class ImmichDatabase {
MoveTable,
NaturalEarthCountriesTable,
NotificationTable,
OcrSearchTable,
PartnerAuditTable,
PartnerTable,
PersonTable,
@ -174,6 +178,8 @@ export interface DB {
asset_metadata: AssetMetadataTable;
asset_metadata_audit: AssetMetadataAuditTable;
asset_job_status: AssetJobStatusTable;
asset_ocr: AssetOcrTable;
ocr_search: OcrSearchTable;
audit: AuditTable;

View File

@ -0,0 +1,16 @@
import { Kysely, sql } from 'kysely';
export async function up(db: Kysely<any>): Promise<void> {
await sql`CREATE TABLE "asset_ocr" ("id" uuid NOT NULL DEFAULT uuid_generate_v4(), "assetId" uuid NOT NULL, "x1" real NOT NULL, "y1" real NOT NULL, "x2" real NOT NULL, "y2" real NOT NULL, "x3" real NOT NULL, "y3" real NOT NULL, "x4" real NOT NULL, "y4" real NOT NULL, "boxScore" real NOT NULL, "textScore" real NOT NULL, "text" text NOT NULL);`.execute(
db,
);
await sql`ALTER TABLE "asset_ocr" ADD CONSTRAINT "asset_ocr_pkey" PRIMARY KEY ("id");`.execute(db);
await sql`ALTER TABLE "asset_ocr" ADD CONSTRAINT "asset_ocr_assetId_fkey" FOREIGN KEY ("assetId") REFERENCES "asset" ("id") ON UPDATE CASCADE ON DELETE CASCADE;`.execute(
db,
);
await sql`CREATE INDEX "asset_ocr_assetId_idx" ON "asset_ocr" ("assetId")`.execute(db);
}
export async function down(db: Kysely<any>): Promise<void> {
await sql`DROP TABLE "asset_ocr";`.execute(db);
}

View File

@ -0,0 +1,20 @@
import { Kysely, sql } from 'kysely';
export async function up(db: Kysely<any>): Promise<void> {
await sql`CREATE TABLE "ocr_search" ("assetId" uuid NOT NULL, "text" text NOT NULL);`.execute(db);
await sql`ALTER TABLE "ocr_search" ADD CONSTRAINT "ocr_search_pkey" PRIMARY KEY ("assetId");`.execute(db);
await sql`ALTER TABLE "ocr_search" ADD CONSTRAINT "ocr_search_assetId_fkey" FOREIGN KEY ("assetId") REFERENCES "asset" ("id") ON UPDATE CASCADE ON DELETE CASCADE;`.execute(
db,
);
await sql`CREATE INDEX "idx_ocr_search_text" ON "ocr_search" USING gin (f_unaccent("text") gin_trgm_ops);`.execute(
db,
);
await sql`INSERT INTO "migration_overrides" ("name", "value") VALUES ('index_idx_ocr_search_text', '{"type":"index","name":"idx_ocr_search_text","sql":"CREATE INDEX \\"idx_ocr_search_text\\" ON \\"ocr_search\\" USING gin (f_unaccent(\\"text\\") gin_trgm_ops);"}'::jsonb);`.execute(
db,
);
}
export async function down(db: Kysely<any>): Promise<void> {
await sql`DROP TABLE "ocr_search";`.execute(db);
await sql`DELETE FROM "migration_overrides" WHERE "name" = 'index_idx_ocr_search_text';`.execute(db);
}

View File

@ -0,0 +1,9 @@
import { Kysely, sql } from 'kysely';
export async function up(db: Kysely<any>): Promise<void> {
await sql`ALTER TABLE "asset_job_status" ADD "ocrAt" timestamp with time zone;`.execute(db);
}
export async function down(db: Kysely<any>): Promise<void> {
await sql`ALTER TABLE "asset_job_status" DROP COLUMN "ocrAt";`.execute(db);
}

View File

@ -20,4 +20,7 @@ export class AssetJobStatusTable {
@Column({ type: 'timestamp with time zone', nullable: true })
thumbnailAt!: Timestamp | null;
@Column({ type: 'timestamp with time zone', nullable: true })
ocrAt!: Timestamp | null;
}

View File

@ -0,0 +1,45 @@
import { AssetTable } from 'src/schema/tables/asset.table';
import { Column, ForeignKeyColumn, Generated, PrimaryGeneratedColumn, Table } from 'src/sql-tools';
@Table('asset_ocr')
export class AssetOcrTable {
@PrimaryGeneratedColumn()
id!: Generated<string>;
@ForeignKeyColumn(() => AssetTable, { onDelete: 'CASCADE', onUpdate: 'CASCADE' })
assetId!: string;
// box positions are normalized, with values between 0 and 1
@Column({ type: 'real' })
x1!: number;
@Column({ type: 'real' })
y1!: number;
@Column({ type: 'real' })
x2!: number;
@Column({ type: 'real' })
y2!: number;
@Column({ type: 'real' })
x3!: number;
@Column({ type: 'real' })
y3!: number;
@Column({ type: 'real' })
x4!: number;
@Column({ type: 'real' })
y4!: number;
@Column({ type: 'real' })
boxScore!: number;
@Column({ type: 'real' })
textScore!: number;
@Column({ type: 'text' })
text!: string;
}

View File

@ -0,0 +1,20 @@
import { AssetTable } from 'src/schema/tables/asset.table';
import { Column, ForeignKeyColumn, Index, Table } from 'src/sql-tools';
@Table('ocr_search')
@Index({
name: 'idx_ocr_search_text',
using: 'gin',
expression: 'f_unaccent("text") gin_trgm_ops',
})
export class OcrSearchTable {
@ForeignKeyColumn(() => AssetTable, {
onDelete: 'CASCADE',
onUpdate: 'CASCADE',
primary: true,
})
assetId!: string;
@Column({ type: 'text' })
text!: string;
}

View File

@ -32,6 +32,7 @@ import { MetadataRepository } from 'src/repositories/metadata.repository';
import { MoveRepository } from 'src/repositories/move.repository';
import { NotificationRepository } from 'src/repositories/notification.repository';
import { OAuthRepository } from 'src/repositories/oauth.repository';
import { OcrRepository } from 'src/repositories/ocr.repository';
import { PartnerRepository } from 'src/repositories/partner.repository';
import { PersonRepository } from 'src/repositories/person.repository';
import { ProcessRepository } from 'src/repositories/process.repository';
@ -84,6 +85,7 @@ export const BASE_SERVICE_DEPENDENCIES = [
MoveRepository,
NotificationRepository,
OAuthRepository,
OcrRepository,
PartnerRepository,
PersonRepository,
ProcessRepository,
@ -137,6 +139,7 @@ export class BaseService {
protected moveRepository: MoveRepository,
protected notificationRepository: NotificationRepository,
protected oauthRepository: OAuthRepository,
protected ocrRepository: OcrRepository,
protected partnerRepository: PartnerRepository,
protected personRepository: PersonRepository,
protected processRepository: ProcessRepository,

View File

@ -20,6 +20,7 @@ import { MemoryService } from 'src/services/memory.service';
import { MetadataService } from 'src/services/metadata.service';
import { NotificationAdminService } from 'src/services/notification-admin.service';
import { NotificationService } from 'src/services/notification.service';
import { OcrService } from 'src/services/ocr.service';
import { PartnerService } from 'src/services/partner.service';
import { PersonService } from 'src/services/person.service';
import { SearchService } from 'src/services/search.service';
@ -65,6 +66,7 @@ export const services = [
MetadataService,
NotificationService,
NotificationAdminService,
OcrService,
PartnerService,
PersonService,
SearchService,

View File

@ -24,7 +24,7 @@ describe(JobService.name, () => {
it('should update concurrency', () => {
sut.onConfigUpdate({ newConfig: defaults, oldConfig: {} as SystemConfig });
expect(mocks.job.setConcurrency).toHaveBeenCalledTimes(15);
expect(mocks.job.setConcurrency).toHaveBeenCalledTimes(16);
expect(mocks.job.setConcurrency).toHaveBeenNthCalledWith(5, QueueName.FacialRecognition, 1);
expect(mocks.job.setConcurrency).toHaveBeenNthCalledWith(7, QueueName.DuplicateDetection, 1);
expect(mocks.job.setConcurrency).toHaveBeenNthCalledWith(8, QueueName.BackgroundTask, 5);
@ -98,6 +98,7 @@ describe(JobService.name, () => {
[QueueName.Library]: expectedJobStatus,
[QueueName.Notification]: expectedJobStatus,
[QueueName.BackupDatabase]: expectedJobStatus,
[QueueName.Ocr]: expectedJobStatus,
});
});
});
@ -268,12 +269,12 @@ describe(JobService.name, () => {
},
{
item: { name: JobName.AssetGenerateThumbnails, data: { id: 'asset-1', source: 'upload' } },
jobs: [JobName.SmartSearch, JobName.AssetDetectFaces],
jobs: [JobName.SmartSearch, JobName.AssetDetectFaces, JobName.Ocr],
stub: [assetStub.livePhotoStillAsset],
},
{
item: { name: JobName.AssetGenerateThumbnails, data: { id: 'asset-1', source: 'upload' } },
jobs: [JobName.SmartSearch, JobName.AssetDetectFaces, JobName.AssetEncodeVideo],
jobs: [JobName.SmartSearch, JobName.AssetDetectFaces, JobName.Ocr, JobName.AssetEncodeVideo],
stub: [assetStub.video],
},
{

View File

@ -236,6 +236,10 @@ export class JobService extends BaseService {
return this.jobRepository.queue({ name: JobName.DatabaseBackup, data: { force } });
}
case QueueName.Ocr: {
return this.jobRepository.queue({ name: JobName.OcrQueueAll, data: { force } });
}
default: {
throw new BadRequestException(`Invalid job name: ${name}`);
}
@ -350,6 +354,7 @@ export class JobService extends BaseService {
const jobs: JobItem[] = [
{ name: JobName.SmartSearch, data: item.data },
{ name: JobName.AssetDetectFaces, data: item.data },
{ name: JobName.Ocr, data: item.data },
];
if (asset.type === AssetType.Video) {

View File

@ -0,0 +1,177 @@
import { AssetVisibility, ImmichWorker, JobName, JobStatus } from 'src/enum';
import { OcrService } from 'src/services/ocr.service';
import { assetStub } from 'test/fixtures/asset.stub';
import { systemConfigStub } from 'test/fixtures/system-config.stub';
import { makeStream, newTestService, ServiceMocks } from 'test/utils';
describe(OcrService.name, () => {
let sut: OcrService;
let mocks: ServiceMocks;
beforeEach(() => {
({ sut, mocks } = newTestService(OcrService));
mocks.config.getWorker.mockReturnValue(ImmichWorker.Microservices);
});
it('should work', () => {
expect(sut).toBeDefined();
});
describe('handleQueueOcr', () => {
it('should do nothing if machine learning is disabled', async () => {
mocks.systemMetadata.get.mockResolvedValue(systemConfigStub.machineLearningDisabled);
await sut.handleQueueOcr({ force: false });
expect(mocks.database.setDimensionSize).not.toHaveBeenCalled();
});
it('should queue the assets without ocr', async () => {
mocks.assetJob.streamForOcrJob.mockReturnValue(makeStream([assetStub.image]));
await sut.handleQueueOcr({ force: false });
expect(mocks.job.queueAll).toHaveBeenCalledWith([{ name: JobName.Ocr, data: { id: assetStub.image.id } }]);
expect(mocks.assetJob.streamForOcrJob).toHaveBeenCalledWith(false);
});
it('should queue all the assets', async () => {
mocks.assetJob.streamForOcrJob.mockReturnValue(makeStream([assetStub.image]));
await sut.handleQueueOcr({ force: true });
expect(mocks.job.queueAll).toHaveBeenCalledWith([{ name: JobName.Ocr, data: { id: assetStub.image.id } }]);
expect(mocks.assetJob.streamForOcrJob).toHaveBeenCalledWith(true);
});
});
describe('handleOcr', () => {
it('should do nothing if machine learning is disabled', async () => {
mocks.systemMetadata.get.mockResolvedValue(systemConfigStub.machineLearningDisabled);
expect(await sut.handleOcr({ id: '123' })).toEqual(JobStatus.Skipped);
expect(mocks.asset.getByIds).not.toHaveBeenCalled();
expect(mocks.machineLearning.encodeImage).not.toHaveBeenCalled();
});
it('should skip assets without a resize path', async () => {
mocks.assetJob.getForOcr.mockResolvedValue({ visibility: AssetVisibility.Timeline, previewFile: null });
expect(await sut.handleOcr({ id: assetStub.noResizePath.id })).toEqual(JobStatus.Failed);
expect(mocks.ocr.upsert).not.toHaveBeenCalled();
expect(mocks.machineLearning.ocr).not.toHaveBeenCalled();
});
it('should save the returned objects', async () => {
mocks.machineLearning.ocr.mockResolvedValue({
box: [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160],
boxScore: [0.9, 0.8],
text: ['One Two Three', 'Four Five'],
textScore: [0.95, 0.85],
});
mocks.assetJob.getForOcr.mockResolvedValue({
visibility: AssetVisibility.Timeline,
previewFile: assetStub.image.files[1].path,
});
expect(await sut.handleOcr({ id: assetStub.image.id })).toEqual(JobStatus.Success);
expect(mocks.machineLearning.ocr).toHaveBeenCalledWith(
'/uploads/user-id/thumbs/path.jpg',
expect.objectContaining({
modelName: 'PP-OCRv5_mobile',
minDetectionScore: 0.5,
minRecognitionScore: 0.8,
maxResolution: 736,
}),
);
expect(mocks.ocr.upsert).toHaveBeenCalledWith(assetStub.image.id, [
{
assetId: assetStub.image.id,
boxScore: 0.9,
text: 'One Two Three',
textScore: 0.95,
x1: 10,
y1: 20,
x2: 30,
y2: 40,
x3: 50,
y3: 60,
x4: 70,
y4: 80,
},
{
assetId: assetStub.image.id,
boxScore: 0.8,
text: 'Four Five',
textScore: 0.85,
x1: 90,
y1: 100,
x2: 110,
y2: 120,
x3: 130,
y3: 140,
x4: 150,
y4: 160,
},
]);
});
it('should apply config settings', async () => {
mocks.systemMetadata.get.mockResolvedValue({
machineLearning: {
enabled: true,
ocr: {
modelName: 'PP-OCRv5_server',
enabled: true,
minDetectionScore: 0.8,
minRecognitionScore: 0.9,
maxResolution: 1500,
},
},
});
mocks.machineLearning.ocr.mockResolvedValue({ box: [], boxScore: [], text: [], textScore: [] });
mocks.assetJob.getForOcr.mockResolvedValue({
visibility: AssetVisibility.Timeline,
previewFile: assetStub.image.files[1].path,
});
expect(await sut.handleOcr({ id: assetStub.image.id })).toEqual(JobStatus.Success);
expect(mocks.machineLearning.ocr).toHaveBeenCalledWith(
'/uploads/user-id/thumbs/path.jpg',
expect.objectContaining({
modelName: 'PP-OCRv5_server',
minDetectionScore: 0.8,
minRecognitionScore: 0.9,
maxResolution: 1500,
}),
);
expect(mocks.ocr.upsert).toHaveBeenCalledWith(assetStub.image.id, []);
});
it('should skip invisible assets', async () => {
mocks.assetJob.getForOcr.mockResolvedValue({
visibility: AssetVisibility.Hidden,
previewFile: assetStub.image.files[1].path,
});
expect(await sut.handleOcr({ id: assetStub.livePhotoMotionAsset.id })).toEqual(JobStatus.Skipped);
expect(mocks.machineLearning.ocr).not.toHaveBeenCalled();
expect(mocks.ocr.upsert).not.toHaveBeenCalled();
});
it('should fail if asset could not be found', async () => {
mocks.assetJob.getForOcr.mockResolvedValue(void 0);
expect(await sut.handleOcr({ id: assetStub.image.id })).toEqual(JobStatus.Failed);
expect(mocks.machineLearning.ocr).not.toHaveBeenCalled();
expect(mocks.ocr.upsert).not.toHaveBeenCalled();
});
});
});

View File

@ -0,0 +1,86 @@
import { Injectable } from '@nestjs/common';
import { JOBS_ASSET_PAGINATION_SIZE } from 'src/constants';
import { OnJob } from 'src/decorators';
import { AssetVisibility, JobName, JobStatus, QueueName } from 'src/enum';
import { OCR } from 'src/repositories/machine-learning.repository';
import { BaseService } from 'src/services/base.service';
import { JobItem, JobOf } from 'src/types';
import { isOcrEnabled } from 'src/utils/misc';
@Injectable()
export class OcrService extends BaseService {
@OnJob({ name: JobName.OcrQueueAll, queue: QueueName.Ocr })
async handleQueueOcr({ force }: JobOf<JobName.OcrQueueAll>): Promise<JobStatus> {
const { machineLearning } = await this.getConfig({ withCache: false });
if (!isOcrEnabled(machineLearning)) {
return JobStatus.Skipped;
}
if (force) {
await this.ocrRepository.deleteAll();
}
let jobs: JobItem[] = [];
const assets = this.assetJobRepository.streamForOcrJob(force);
for await (const asset of assets) {
jobs.push({ name: JobName.Ocr, data: { id: asset.id } });
if (jobs.length >= JOBS_ASSET_PAGINATION_SIZE) {
await this.jobRepository.queueAll(jobs);
jobs = [];
}
}
await this.jobRepository.queueAll(jobs);
return JobStatus.Success;
}
@OnJob({ name: JobName.Ocr, queue: QueueName.Ocr })
async handleOcr({ id }: JobOf<JobName.Ocr>): Promise<JobStatus> {
const { machineLearning } = await this.getConfig({ withCache: true });
if (!isOcrEnabled(machineLearning)) {
return JobStatus.Skipped;
}
const asset = await this.assetJobRepository.getForOcr(id);
if (!asset || !asset.previewFile) {
return JobStatus.Failed;
}
if (asset.visibility === AssetVisibility.Hidden) {
return JobStatus.Skipped;
}
const ocrResults = await this.machineLearningRepository.ocr(asset.previewFile, machineLearning.ocr);
await this.ocrRepository.upsert(id, this.parseOcrResults(id, ocrResults));
await this.assetRepository.upsertJobStatus({ assetId: id, ocrAt: new Date() });
this.logger.debug(`Processed ${ocrResults.text.length} OCR result(s) for ${id}`);
return JobStatus.Success;
}
private parseOcrResults(id: string, { box, boxScore, text, textScore }: OCR) {
const ocrDataList = [];
for (let i = 0; i < text.length; i++) {
const boxOffset = i * 8;
ocrDataList.push({
assetId: id,
x1: box[boxOffset],
y1: box[boxOffset + 1],
x2: box[boxOffset + 2],
y2: box[boxOffset + 3],
x3: box[boxOffset + 4],
y3: box[boxOffset + 5],
x4: box[boxOffset + 6],
y4: box[boxOffset + 7],
boxScore: boxScore[i],
textScore: textScore[i],
text: text[i],
});
}
return ocrDataList;
}
}

View File

@ -141,6 +141,7 @@ describe(ServerService.name, () => {
reverseGeocoding: true,
oauth: false,
oauthAutoLaunch: false,
ocr: true,
passwordLogin: true,
search: true,
sidecar: true,

View File

@ -19,7 +19,12 @@ import { UserStatsQueryResponse } from 'src/repositories/user.repository';
import { BaseService } from 'src/services/base.service';
import { asHumanReadable } from 'src/utils/bytes';
import { mimeTypes } from 'src/utils/mime-types';
import { isDuplicateDetectionEnabled, isFacialRecognitionEnabled, isSmartSearchEnabled } from 'src/utils/misc';
import {
isDuplicateDetectionEnabled,
isFacialRecognitionEnabled,
isOcrEnabled,
isSmartSearchEnabled,
} from 'src/utils/misc';
@Injectable()
export class ServerService extends BaseService {
@ -97,6 +102,7 @@ export class ServerService extends BaseService {
trash: trash.enabled,
oauth: oauth.enabled,
oauthAutoLaunch: oauth.autoLaunch,
ocr: isOcrEnabled(machineLearning),
passwordLogin: passwordLogin.enabled,
configFile: !!configFile,
email: notifications.smtp.enabled,

View File

@ -39,6 +39,7 @@ const updatedConfig = Object.freeze<SystemConfig>({
[QueueName.ThumbnailGeneration]: { concurrency: 3 },
[QueueName.VideoConversion]: { concurrency: 1 },
[QueueName.Notification]: { concurrency: 5 },
[QueueName.Ocr]: { concurrency: 1 },
},
backup: {
database: {
@ -102,6 +103,13 @@ const updatedConfig = Object.freeze<SystemConfig>({
maxDistance: 0.5,
minFaces: 3,
},
ocr: {
enabled: true,
modelName: 'PP-OCRv5_mobile',
minDetectionScore: 0.5,
minRecognitionScore: 0.8,
maxResolution: 736,
},
},
map: {
enabled: true,

View File

@ -322,7 +322,8 @@ export type ColumnType =
| 'uuid'
| 'vector'
| 'enum'
| 'serial';
| 'serial'
| 'real';
export type DatabaseSchema = {
databaseName: string;

View File

@ -370,7 +370,11 @@ export type JobItem =
| { name: JobName.NotifyUserSignup; data: INotifySignupJob }
// Version check
| { name: JobName.VersionCheck; data: IBaseJob };
| { name: JobName.VersionCheck; data: IBaseJob }
// OCR
| { name: JobName.OcrQueueAll; data: IBaseJob }
| { name: JobName.Ocr; data: IEntityJob };
export type VectorExtension = (typeof VECTOR_EXTENSIONS)[number];

View File

@ -200,6 +200,14 @@ export function withFiles(eb: ExpressionBuilder<DB, 'asset'>, type?: AssetFileTy
).as('files');
}
export function withFilePath(eb: ExpressionBuilder<DB, 'asset'>, type: AssetFileType) {
return eb
.selectFrom('asset_file')
.select('asset_file.path')
.whereRef('asset_file.assetId', '=', 'asset.id')
.where('asset_file.type', '=', type);
}
export function withFacesAndPeople(eb: ExpressionBuilder<DB, 'asset'>, withDeletedFace?: boolean) {
return jsonArrayFrom(
eb
@ -380,6 +388,11 @@ export function searchAssetBuilder(kysely: Kysely<DB>, options: AssetSearchBuild
.innerJoin('asset_exif', 'asset.id', 'asset_exif.assetId')
.where(sql`f_unaccent(asset_exif.description)`, 'ilike', sql`'%' || f_unaccent(${options.description}) || '%'`),
)
.$if(!!options.ocr, (qb) =>
qb
.innerJoin('ocr_search', 'asset.id', 'ocr_search.assetId')
.where(() => sql`f_unaccent(ocr_search.text) %>> f_unaccent(${options.ocr!})`),
)
.$if(!!options.type, (qb) => qb.where('asset.type', '=', options.type!))
.$if(options.isFavorite !== undefined, (qb) => qb.where('asset.isFavorite', '=', options.isFavorite!))
.$if(options.isOffline !== undefined, (qb) => qb.where('asset.isOffline', '=', options.isOffline!))

View File

@ -95,6 +95,8 @@ export const unsetDeep = (object: unknown, key: string) => {
const isMachineLearningEnabled = (machineLearning: SystemConfig['machineLearning']) => machineLearning.enabled;
export const isSmartSearchEnabled = (machineLearning: SystemConfig['machineLearning']) =>
isMachineLearningEnabled(machineLearning) && machineLearning.clip.enabled;
export const isOcrEnabled = (machineLearning: SystemConfig['machineLearning']) =>
isMachineLearningEnabled(machineLearning) && machineLearning.ocr.enabled;
export const isFacialRecognitionEnabled = (machineLearning: SystemConfig['machineLearning']) =>
isMachineLearningEnabled(machineLearning) && machineLearning.facialRecognition.enabled;
export const isDuplicateDetectionEnabled = (machineLearning: SystemConfig['machineLearning']) =>

View File

@ -27,8 +27,10 @@ import { EmailRepository } from 'src/repositories/email.repository';
import { EventRepository } from 'src/repositories/event.repository';
import { JobRepository } from 'src/repositories/job.repository';
import { LoggingRepository } from 'src/repositories/logging.repository';
import { MachineLearningRepository } from 'src/repositories/machine-learning.repository';
import { MemoryRepository } from 'src/repositories/memory.repository';
import { NotificationRepository } from 'src/repositories/notification.repository';
import { OcrRepository } from 'src/repositories/ocr.repository';
import { PartnerRepository } from 'src/repositories/partner.repository';
import { PersonRepository } from 'src/repositories/person.repository';
import { SearchRepository } from 'src/repositories/search.repository';
@ -47,6 +49,7 @@ import { VersionHistoryRepository } from 'src/repositories/version-history.repos
import { DB } from 'src/schema';
import { AlbumTable } from 'src/schema/tables/album.table';
import { AssetExifTable } from 'src/schema/tables/asset-exif.table';
import { AssetFileTable } from 'src/schema/tables/asset-file.table';
import { AssetJobStatusTable } from 'src/schema/tables/asset-job-status.table';
import { AssetTable } from 'src/schema/tables/asset.table';
import { FaceSearchTable } from 'src/schema/tables/face-search.table';
@ -169,6 +172,11 @@ export class MediumTestContext<S extends BaseService = BaseService> {
return { asset, result };
}
async newAssetFile(dto: Insertable<AssetFileTable>) {
const result = await this.get(AssetRepository).upsertFile(dto);
return { result };
}
async newAssetFace(dto: Partial<Insertable<AssetFace>> & { assetId: string }) {
const assetFace = mediumFactory.assetFaceInsert(dto);
const result = await this.get(PersonRepository).createAssetFace(assetFace);
@ -307,6 +315,7 @@ const newRealRepository = <T>(key: ClassConstructor<T>, db: Kysely<DB>): T => {
case AssetJobRepository:
case MemoryRepository:
case NotificationRepository:
case OcrRepository:
case PartnerRepository:
case PersonRepository:
case SearchRepository:
@ -359,6 +368,7 @@ const newMockRepository = <T>(key: ClassConstructor<T>) => {
case CryptoRepository:
case MemoryRepository:
case NotificationRepository:
case OcrRepository:
case PartnerRepository:
case PersonRepository:
case SessionRepository:
@ -407,6 +417,10 @@ const newMockRepository = <T>(key: ClassConstructor<T>) => {
return automock(LoggingRepository, { args: [undefined, configMock], strict: false });
}
case MachineLearningRepository: {
return automock(MachineLearningRepository, { args: [{ setContext: () => {} }] });
}
case StorageRepository: {
return automock(StorageRepository, { args: [{ setContext: () => {} }] });
}

View File

@ -0,0 +1,243 @@
import { Kysely } from 'kysely';
import { AssetFileType, JobStatus } from 'src/enum';
import { AssetJobRepository } from 'src/repositories/asset-job.repository';
import { AssetRepository } from 'src/repositories/asset.repository';
import { ConfigRepository } from 'src/repositories/config.repository';
import { JobRepository } from 'src/repositories/job.repository';
import { LoggingRepository } from 'src/repositories/logging.repository';
import { MachineLearningRepository } from 'src/repositories/machine-learning.repository';
import { OcrRepository } from 'src/repositories/ocr.repository';
import { SystemMetadataRepository } from 'src/repositories/system-metadata.repository';
import { DB } from 'src/schema';
import { OcrService } from 'src/services/ocr.service';
import { newMediumService } from 'test/medium.factory';
import { getKyselyDB } from 'test/utils';
let defaultDatabase: Kysely<DB>;
const setup = (db?: Kysely<DB>) => {
return newMediumService(OcrService, {
database: db || defaultDatabase,
real: [AssetRepository, AssetJobRepository, ConfigRepository, OcrRepository, SystemMetadataRepository],
mock: [JobRepository, LoggingRepository, MachineLearningRepository],
});
};
beforeAll(async () => {
defaultDatabase = await getKyselyDB();
});
describe(OcrService.name, () => {
it('should work', () => {
const { sut } = setup();
expect(sut).toBeDefined();
});
it('should parse asset', async () => {
const { sut, ctx } = setup();
const { user } = await ctx.newUser();
const { asset } = await ctx.newAsset({ ownerId: user.id });
await ctx.newAssetFile({ assetId: asset.id, type: AssetFileType.Preview, path: 'preview.jpg' });
const machineLearningMock = ctx.getMock(MachineLearningRepository);
machineLearningMock.ocr.mockResolvedValue({
box: [10, 10, 50, 10, 50, 50, 10, 50],
boxScore: [0.99],
text: ['Test OCR'],
textScore: [0.95],
});
await expect(sut.handleOcr({ id: asset.id })).resolves.toBe(JobStatus.Success);
const ocrRepository = ctx.get(OcrRepository);
await expect(ocrRepository.getByAssetId(asset.id)).resolves.toEqual([
{
assetId: asset.id,
boxScore: 0.99,
id: expect.any(String),
text: 'Test OCR',
textScore: 0.95,
x1: 10,
y1: 10,
x2: 50,
y2: 10,
x3: 50,
y3: 50,
x4: 10,
y4: 50,
},
]);
await expect(
ctx.database.selectFrom('ocr_search').selectAll().where('assetId', '=', asset.id).executeTakeFirst(),
).resolves.toEqual({
assetId: asset.id,
text: 'Test OCR',
});
await expect(
ctx.database
.selectFrom('asset_job_status')
.select('asset_job_status.ocrAt')
.where('assetId', '=', asset.id)
.executeTakeFirst(),
).resolves.toEqual({ ocrAt: expect.any(Date) });
});
it('should handle multiple boxes', async () => {
const { sut, ctx } = setup();
const { user } = await ctx.newUser();
const { asset } = await ctx.newAsset({ ownerId: user.id });
await ctx.newAssetFile({ assetId: asset.id, type: AssetFileType.Preview, path: 'preview.jpg' });
const machineLearningMock = ctx.getMock(MachineLearningRepository);
machineLearningMock.ocr.mockResolvedValue({
box: Array.from({ length: 8 * 5 }, (_, i) => i),
boxScore: [0.7, 0.67, 0.65, 0.62, 0.6],
text: ['One', 'Two', 'Three', 'Four', 'Five'],
textScore: [0.9, 0.89, 0.88, 0.87, 0.86],
});
await expect(sut.handleOcr({ id: asset.id })).resolves.toBe(JobStatus.Success);
const ocrRepository = ctx.get(OcrRepository);
await expect(ocrRepository.getByAssetId(asset.id)).resolves.toEqual([
{
assetId: asset.id,
boxScore: 0.7,
id: expect.any(String),
text: 'One',
textScore: 0.9,
x1: 0,
y1: 1,
x2: 2,
y2: 3,
x3: 4,
y3: 5,
x4: 6,
y4: 7,
},
{
assetId: asset.id,
boxScore: 0.67,
id: expect.any(String),
text: 'Two',
textScore: 0.89,
x1: 8,
y1: 9,
x2: 10,
y2: 11,
x3: 12,
y3: 13,
x4: 14,
y4: 15,
},
{
assetId: asset.id,
boxScore: 0.65,
id: expect.any(String),
text: 'Three',
textScore: 0.88,
x1: 16,
y1: 17,
x2: 18,
y2: 19,
x3: 20,
y3: 21,
x4: 22,
y4: 23,
},
{
assetId: asset.id,
boxScore: 0.62,
id: expect.any(String),
text: 'Four',
textScore: 0.87,
x1: 24,
y1: 25,
x2: 26,
y2: 27,
x3: 28,
y3: 29,
x4: 30,
y4: 31,
},
{
assetId: asset.id,
boxScore: 0.6,
id: expect.any(String),
text: 'Five',
textScore: 0.86,
x1: 32,
y1: 33,
x2: 34,
y2: 35,
x3: 36,
y3: 37,
x4: 38,
y4: 39,
},
]);
await expect(
ctx.database.selectFrom('ocr_search').selectAll().where('assetId', '=', asset.id).executeTakeFirst(),
).resolves.toEqual({
assetId: asset.id,
text: 'One Two Three Four Five',
});
await expect(
ctx.database
.selectFrom('asset_job_status')
.select('asset_job_status.ocrAt')
.where('assetId', '=', asset.id)
.executeTakeFirst(),
).resolves.toEqual({ ocrAt: expect.any(Date) });
});
it('should handle no boxes', async () => {
const { sut, ctx } = setup();
const { user } = await ctx.newUser();
const { asset } = await ctx.newAsset({ ownerId: user.id });
await ctx.newAssetFile({ assetId: asset.id, type: AssetFileType.Preview, path: 'preview.jpg' });
const machineLearningMock = ctx.getMock(MachineLearningRepository);
machineLearningMock.ocr.mockResolvedValue({ box: [], boxScore: [], text: [], textScore: [] });
await expect(sut.handleOcr({ id: asset.id })).resolves.toBe(JobStatus.Success);
const ocrRepository = ctx.get(OcrRepository);
await expect(ocrRepository.getByAssetId(asset.id)).resolves.toEqual([]);
await expect(
ctx.database.selectFrom('ocr_search').selectAll().where('assetId', '=', asset.id).executeTakeFirst(),
).resolves.toBeUndefined();
await expect(
ctx.database
.selectFrom('asset_job_status')
.select('asset_job_status.ocrAt')
.where('assetId', '=', asset.id)
.executeTakeFirst(),
).resolves.toEqual({ ocrAt: expect.any(Date) });
});
it('should update existing results', async () => {
const { sut, ctx } = setup();
const { user } = await ctx.newUser();
const { asset } = await ctx.newAsset({ ownerId: user.id });
await ctx.newAssetFile({ assetId: asset.id, type: AssetFileType.Preview, path: 'preview.jpg' });
const machineLearningMock = ctx.getMock(MachineLearningRepository);
machineLearningMock.ocr.mockResolvedValue({
box: [10, 10, 50, 10, 50, 50, 10, 50],
boxScore: [0.99],
text: ['Test OCR'],
textScore: [0.95],
});
await expect(sut.handleOcr({ id: asset.id })).resolves.toBe(JobStatus.Success);
machineLearningMock.ocr.mockResolvedValue({ box: [], boxScore: [], text: [], textScore: [] });
await expect(sut.handleOcr({ id: asset.id })).resolves.toBe(JobStatus.Success);
const ocrRepository = ctx.get(OcrRepository);
await expect(ocrRepository.getByAssetId(asset.id)).resolves.toEqual([]);
await expect(
ctx.database.selectFrom('ocr_search').selectAll().where('assetId', '=', asset.id).executeTakeFirst(),
).resolves.toBeUndefined();
});
});

View File

@ -41,6 +41,7 @@ import { MetadataRepository } from 'src/repositories/metadata.repository';
import { MoveRepository } from 'src/repositories/move.repository';
import { NotificationRepository } from 'src/repositories/notification.repository';
import { OAuthRepository } from 'src/repositories/oauth.repository';
import { OcrRepository } from 'src/repositories/ocr.repository';
import { PartnerRepository } from 'src/repositories/partner.repository';
import { PersonRepository } from 'src/repositories/person.repository';
import { ProcessRepository } from 'src/repositories/process.repository';
@ -230,6 +231,7 @@ export type ServiceOverrides = {
metadata: MetadataRepository;
move: MoveRepository;
notification: NotificationRepository;
ocr: OcrRepository;
oauth: OAuthRepository;
partner: PartnerRepository;
person: PersonRepository;
@ -302,6 +304,7 @@ export const newTestService = <T extends BaseService>(
metadata: newMetadataRepositoryMock(),
move: automock(MoveRepository, { strict: false }),
notification: automock(NotificationRepository),
ocr: automock(OcrRepository, { strict: false }),
oauth: automock(OAuthRepository, { args: [loggerMock] }),
partner: automock(PartnerRepository, { strict: false }),
person: automock(PersonRepository, { strict: false }),
@ -357,6 +360,7 @@ export const newTestService = <T extends BaseService>(
overrides.move || (mocks.move as As<MoveRepository>),
overrides.notification || (mocks.notification as As<NotificationRepository>),
overrides.oauth || (mocks.oauth as As<OAuthRepository>),
overrides.ocr || (mocks.ocr as As<OcrRepository>),
overrides.partner || (mocks.partner as As<PartnerRepository>),
overrides.person || (mocks.person as As<PersonRepository>),
overrides.process || (mocks.process as As<ProcessRepository>),

View File

@ -31,6 +31,7 @@
JobName.VideoConversion,
JobName.StorageTemplateMigration,
JobName.Migration,
JobName.Ocr,
];
// eslint-disable-next-line @typescript-eslint/no-explicit-any

View File

@ -254,6 +254,71 @@
</div>
</SettingAccordion>
<SettingAccordion
key="ocr"
title={$t('admin.machine_learning_ocr')}
subtitle={$t('admin.machine_learning_ocr_description')}
>
<div class="ml-4 mt-4 flex flex-col gap-4">
<SettingSwitch
title={$t('admin.machine_learning_ocr_enabled')}
subtitle={$t('admin.machine_learning_ocr_enabled_description')}
bind:checked={config.machineLearning.ocr.enabled}
disabled={disabled || !config.machineLearning.enabled}
/>
<hr />
<SettingSelect
label={$t('admin.machine_learning_ocr_model')}
desc={$t('admin.machine_learning_ocr_model_description')}
name="ocr-model"
bind:value={config.machineLearning.ocr.modelName}
options={[
{ value: 'PP-OCRv5_server', text: 'PP-OCRv5_server' },
{ value: 'PP-OCRv5_mobile', text: 'PP-OCRv5_mobile' },
]}
disabled={disabled || !config.machineLearning.enabled || !config.machineLearning.ocr.enabled}
isEdited={config.machineLearning.ocr.modelName !== savedConfig.machineLearning.ocr.modelName}
/>
<SettingInputField
inputType={SettingInputFieldType.NUMBER}
label={$t('admin.machine_learning_ocr_min_detection_score')}
description={$t('admin.machine_learning_ocr_min_detection_score_description')}
bind:value={config.machineLearning.ocr.minDetectionScore}
step="0.1"
min={0.1}
max={1}
disabled={disabled || !config.machineLearning.enabled || !config.machineLearning.ocr.enabled}
isEdited={config.machineLearning.ocr.minDetectionScore !==
savedConfig.machineLearning.ocr.minDetectionScore}
/>
<SettingInputField
inputType={SettingInputFieldType.NUMBER}
label={$t('admin.machine_learning_ocr_min_recognition_score')}
description={$t('admin.machine_learning_ocr_min_score_recognition_description')}
bind:value={config.machineLearning.ocr.minRecognitionScore}
step="0.1"
min={0.1}
max={1}
disabled={disabled || !config.machineLearning.enabled || !config.machineLearning.ocr.enabled}
isEdited={config.machineLearning.ocr.minRecognitionScore !==
savedConfig.machineLearning.ocr.minRecognitionScore}
/>
<SettingInputField
inputType={SettingInputFieldType.NUMBER}
label={$t('admin.machine_learning_ocr_max_resolution')}
description={$t('admin.machine_learning_ocr_max_resolution_description')}
bind:value={config.machineLearning.ocr.maxResolution}
min={1}
disabled={disabled || !config.machineLearning.enabled || !config.machineLearning.ocr.enabled}
isEdited={config.machineLearning.ocr.maxResolution !== savedConfig.machineLearning.ocr.maxResolution}
/>
</div>
</SettingAccordion>
<SettingButtonsRow
onReset={(options) => onReset({ ...options, configKeys: ['machineLearning'] })}
onSave={() => onSave({ machineLearning: config.machineLearning })}

View File

@ -19,6 +19,7 @@
mdiTable,
mdiTagFaces,
mdiVideo,
mdiOcr,
} from '@mdi/js';
import type { Component } from 'svelte';
import { t } from 'svelte-i18n';
@ -124,6 +125,14 @@
handleCommand: handleConfirmCommand,
disabled: !$featureFlags.facialRecognition,
},
[JobName.Ocr]: {
icon: mdiOcr,
title: $getJobName(JobName.Ocr),
subtitle: $t('admin.ocr_job_description'),
allText: $t('all'),
missingText: $t('missing'),
disabled: !$featureFlags.ocr,
},
[JobName.VideoConversion]: {
icon: mdiVideo,
title: $getJobName(JobName.VideoConversion),

View File

@ -107,7 +107,7 @@
const onSubmit = () => {
const searchType = getSearchType();
let payload: SmartSearchDto | MetadataSearchDto = {} as SmartSearchDto | MetadataSearchDto;
let payload = {} as SmartSearchDto | MetadataSearchDto;
switch (searchType) {
case 'smart': {
@ -122,6 +122,10 @@
payload = { description: value } as MetadataSearchDto;
break;
}
case 'ocr': {
payload = { ocr: value } as MetadataSearchDto;
break;
}
}
handlePromiseError(handleSearch(payload));
@ -171,17 +175,14 @@
onSubmit();
};
function getSearchType(): 'smart' | 'metadata' | 'description' {
function getSearchType() {
const searchType = localStorage.getItem('searchQueryType');
switch (searchType) {
case 'smart': {
return 'smart';
}
case 'metadata': {
return 'metadata';
}
case 'description': {
return 'description';
case 'smart':
case 'metadata':
case 'description':
case 'ocr': {
return searchType;
}
default: {
return 'smart';
@ -201,6 +202,9 @@
case 'description': {
return $t('description');
}
case 'ocr': {
return $t('ocr');
}
}
}
</script>

View File

@ -4,7 +4,7 @@
interface Props {
query: string | undefined;
queryType?: 'smart' | 'metadata' | 'description';
queryType?: 'smart' | 'metadata' | 'description' | 'ocr';
}
let { query = $bindable(), queryType = $bindable('smart') }: Props = $props();
@ -28,6 +28,7 @@
bind:group={queryType}
value="description"
/>
<RadioButton name="query-type" id="ocr-radio" label={$t('ocr')} bind:group={queryType} value="ocr" />
</div>
</fieldset>
@ -63,4 +64,15 @@
bind:value={query}
aria-labelledby="description-label"
/>
{:else if queryType === 'ocr'}
<label for="ocr-input" class="immich-form-label">{$t('search_by_ocr')}</label>
<input
class="immich-form-input hover:cursor-text w-full !mt-1"
type="text"
id="ocr-input"
name="ocr"
placeholder={$t('search_by_ocr_example')}
bind:value={query}
aria-labelledby="ocr-label"
/>
{/if}

View File

@ -138,9 +138,10 @@ export enum QueryType {
SMART = 'smart',
METADATA = 'metadata',
DESCRIPTION = 'description',
OCR = 'ocr',
}
export const validQueryTypes = new Set([QueryType.SMART, QueryType.METADATA, QueryType.DESCRIPTION]);
export const validQueryTypes = new Set([QueryType.SMART, QueryType.METADATA, QueryType.DESCRIPTION, QueryType.OCR]);
export const locales = [
{ code: 'af-ZA', name: 'Afrikaans (South Africa)' },

View File

@ -6,7 +6,8 @@
export type SearchFilter = {
query: string;
queryType: 'smart' | 'metadata' | 'description';
ocr?: string;
queryType: 'smart' | 'metadata' | 'description' | 'ocr';
personIds: SvelteSet<string>;
tagIds: SvelteSet<string> | null;
location: SearchLocationFilter;
@ -74,6 +75,7 @@
let filter: SearchFilter = $state({
query,
ocr: searchQuery.ocr,
queryType: defaultQueryType(),
personIds: new SvelteSet('personIds' in searchQuery ? searchQuery.personIds : []),
tagIds:
@ -113,6 +115,7 @@
const resetForm = () => {
filter = {
query: '',
ocr: undefined,
queryType: defaultQueryType(), // retain from localStorage or default
personIds: new SvelteSet(),
tagIds: new SvelteSet(),
@ -141,6 +144,7 @@
let payload: SmartSearchDto | MetadataSearchDto = {
query: filter.queryType === 'smart' ? query : undefined,
ocr: filter.queryType === 'ocr' ? query : undefined,
originalFileName: filter.queryType === 'metadata' ? query : undefined,
description: filter.queryType === 'description' ? query : undefined,
country: filter.location.country,

View File

@ -26,6 +26,7 @@ export const featureFlags = writable<FeatureFlags>({
configFile: false,
trash: true,
email: false,
ocr: true,
});
export type ServerConfig = ServerConfigDto & { loaded: boolean };

View File

@ -162,6 +162,7 @@ export const getJobName = derived(t, ($t) => {
[JobName.Library]: $t('external_libraries'),
[JobName.Notifications]: $t('notifications'),
[JobName.BackupDatabase]: $t('admin.backup_database'),
[JobName.Ocr]: $t('admin.machine_learning_ocr'),
};
return names[jobName];

View File

@ -205,6 +205,7 @@
originalFileName: $t('file_name'),
description: $t('description'),
queryAssetId: $t('query_asset_id'),
ocr: $t('ocr'),
};
return keyMap[key] || key;
}