Kang 02b29046b3
feat: ocr (#18836)
* feat: add OCR functionality and related configurations

* chore: update labeler configuration for machine learning files

* feat(i18n): enhance OCR model descriptions and add orientation classification and unwarping features

* chore: update Dockerfile to include ccache for improved build performance

* feat(ocr): enhance OCR model configuration with orientation classification and unwarping options, update PaddleOCR integration, and improve response structure

* refactor(ocr): remove OCR_CLEANUP job from enum and type definitions

* refactor(ocr): remove obsolete OCR entity and migration files, and update asset job status and schema to accommodate new OCR table structure

* refactor(ocr): update OCR schema and response structure to use individual coordinates instead of bounding box, and adjust related service and repository files

* feat: enhance OCR configuration and functionality

- Updated OCR settings to include minimum detection box score, minimum detection score, and minimum recognition score.
- Refactored PaddleOCRecognizer to utilize new scoring parameters.
- Introduced new database tables for asset OCR data and search functionality.
- Modified related services and repositories to support the new OCR features.
- Updated translations for improved clarity in settings UI.

* sql changes

* use rapidocr

* change dto

* update web

* update lock

* update api

* store positions as normalized floats

* match column order in db

* update admin ui settings descriptions

fix max resolution key

set min threshold to 0.1

fix bind

* apply config correctly, adjust defaults

* unnecessary model type

* unnecessary sources

* fix(ocr): switch RapidOCR lang type from LangDet to LangRec

* fix(ocr): expose lang_type (LangRec.CH) and font_path on OcrOptions for RapidOCR

* fix(ocr): make OCR text search case- and accent-insensitive using ILIKE + unaccent

* fix(ocr): add OCR search fields

* fix: Add OCR database migration and update ML prediction logic.

* trigrams are already case insensitive

* add tests

* format

* update migrations

* wrong uuid function

* linting

* maybe fix medium tests

* formatting

* fix weblate check

* openapi

* sql

* minor fixes

* maybe fix medium tests part 2

* passing medium tests

* format web

* readd sql

* format dart

* disabled in e2e

* chore: translation ordering

---------

Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>
Co-authored-by: Alex Tran <alex.tran1502@gmail.com>
2025-10-27 14:09:55 +00:00

189 lines
7.6 KiB
Docker

ARG DEVICE=cpu
FROM python:3.11-bookworm@sha256:fc1f2e357c307c4044133952b203e66a47e7726821a664f603a180a0c5823844 AS builder-cpu
FROM builder-cpu AS builder-openvino
FROM builder-cpu AS builder-cuda
FROM builder-cpu AS builder-armnn
# renovate: datasource=github-releases depName=ARM-software/armnn
ARG ARMNN_VERSION="v24.05"
ENV ARMNN_PATH=/opt/armnn
COPY ann /opt/ann
RUN mkdir /opt/armnn && \
curl -SL "https://github.com/ARM-software/armnn/releases/download/${ARMNN_VERSION}/ArmNN-linux-aarch64.tar.gz" | tar -zx -C /opt/armnn && \
cd /opt/ann && \
sh build.sh
FROM builder-cpu AS builder-rknn
# Warning: 25GiB+ disk space required to pull this image
# TODO: find a way to reduce the image size
FROM rocm/dev-ubuntu-22.04:6.4.3-complete@sha256:1f7e92ca7e3a3785680473329ed1091fc99db3e90fcb3a1688f2933e870ed76b AS builder-rocm
# renovate: datasource=github-releases depName=Microsoft/onnxruntime
ARG ONNXRUNTIME_VERSION="v1.20.1"
WORKDIR /code
RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.10-venv
RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.30.1/cmake-3.30.1-linux-x86_64.sh && \
chmod +x cmake-3.30.1-linux-x86_64.sh && \
mkdir -p /code/cmake-3.30.1-linux-x86_64 && \
./cmake-3.30.1-linux-x86_64.sh --skip-license --prefix=/code/cmake-3.30.1-linux-x86_64 && \
rm cmake-3.30.1-linux-x86_64.sh
ENV PATH=/code/cmake-3.30.1-linux-x86_64/bin:${PATH}
RUN git clone --single-branch --branch "${ONNXRUNTIME_VERSION}" --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
WORKDIR /code/onnxruntime
# Fix for multi-threading based on comments in https://github.com/microsoft/onnxruntime/pull/19567
# TODO: find a way to fix this without disabling algo caching
COPY ./patches/* /tmp/
RUN git apply /tmp/*.patch
RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
# Note: the `parallel` setting uses a substantial amount of RAM
RUN ./build.sh --allow_running_as_root --config Release --build_wheel --update --build --parallel 17 --cmake_extra_defines\
ONNXRUNTIME_VERSION="${ONNXRUNTIME_VERSION}" --skip_tests --use_rocm --rocm_home=/opt/rocm
RUN mv /code/onnxruntime/build/Linux/Release/dist/*.whl /opt/
FROM builder-${DEVICE} AS builder
ARG DEVICE
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
VIRTUAL_ENV=/opt/venv
RUN apt-get update && apt-get install -y --no-install-recommends g++
COPY --from=ghcr.io/astral-sh/uv:0.8.15@sha256:a5727064a0de127bdb7c9d3c1383f3a9ac307d9f2d8a391edc7896c54289ced0 /uv /uvx /bin/
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --frozen --extra ${DEVICE} --no-dev --no-editable --no-install-project --compile-bytecode --no-progress --active --link-mode copy
RUN if [ "$DEVICE" = "rocm" ]; then \
uv pip install /opt/onnxruntime_rocm-*.whl; \
fi
FROM python:3.11-slim-bookworm@sha256:873f91540d53b36327ed4fb018c9669107a4e2a676719720edb4209c4b15d029 AS prod-cpu
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
MACHINE_LEARNING_MODEL_ARENA=false
FROM python:3.11-slim-bookworm@sha256:873f91540d53b36327ed4fb018c9669107a4e2a676719720edb4209c4b15d029 AS prod-openvino
RUN apt-get update && \
apt-get install --no-install-recommends -yqq ocl-icd-libopencl1 wget && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17384.11/intel-igc-core_1.0.17384.11_amd64.deb && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17384.11/intel-igc-opencl_1.0.17384.11_amd64.deb && \
wget -nv https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/intel-opencl-icd_24.31.30508.7_amd64.deb && \
# TODO: Figure out how to get renovate to manage this differently versioned libigdgmm file
wget -nv https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/libigdgmm12_22.4.1_amd64.deb && \
dpkg -i *.deb && \
rm *.deb && \
apt-get remove wget -yqq && \
rm -rf /var/lib/apt/lists/*
FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04@sha256:94c1577b2cd9dd6c0312dc04dff9cb2fdce2b268018abc3d7c2dbcacf1155000 AS prod-cuda
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
MACHINE_LEARNING_MODEL_ARENA=false
RUN apt-get update && \
# Pascal support was dropped in 9.11
apt-get install --no-install-recommends -yqq libcudnn9-cuda-12=9.10.2.21-1 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
COPY --from=builder-cuda /usr/local/bin/python3 /usr/local/bin/python3
COPY --from=builder-cuda /usr/local/lib/python3.11 /usr/local/lib/python3.11
COPY --from=builder-cuda /usr/local/lib/libpython3.11.so /usr/local/lib/libpython3.11.so
FROM rocm/dev-ubuntu-22.04:6.4.3-complete@sha256:1f7e92ca7e3a3785680473329ed1091fc99db3e90fcb3a1688f2933e870ed76b AS prod-rocm
FROM prod-cpu AS prod-armnn
ENV LD_LIBRARY_PATH=/opt/armnn \
LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
MACHINE_LEARNING_MODEL_ARENA=false
RUN apt-get update && apt-get install -y --no-install-recommends ocl-icd-libopencl1 mesa-opencl-icd libgomp1 && \
rm -rf /var/lib/apt/lists/* && \
mkdir --parents /etc/OpenCL/vendors && \
echo "/usr/lib/libmali.so" > /etc/OpenCL/vendors/mali.icd && \
mkdir /opt/armnn
COPY --from=builder-armnn \
/opt/armnn/libarmnn.so.?? \
/opt/armnn/libarmnnOnnxParser.so.?? \
/opt/armnn/libarmnnDeserializer.so.?? \
/opt/armnn/libarmnnTfLiteParser.so.?? \
/opt/armnn/libprotobuf.so.?.??.?.? \
/opt/ann/libann.s[o] \
/opt/ann/build.sh \
/opt/armnn/
FROM prod-cpu AS prod-rknn
# renovate: datasource=github-tags depName=airockchip/rknn-toolkit2
ARG RKNN_TOOLKIT_VERSION="v2.3.0"
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
MACHINE_LEARNING_MODEL_ARENA=false
ADD --checksum=sha256:73993ed4b440460825f21611731564503cc1d5a0c123746477da6cd574f34885 "https://github.com/airockchip/rknn-toolkit2/raw/refs/tags/${RKNN_TOOLKIT_VERSION}/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so" /usr/lib/
FROM prod-${DEVICE} AS prod
ARG DEVICE
RUN apt-get update && \
apt-get install -y --no-install-recommends tini ccache libgl1 libglib2.0-0 libgomp1 $(if ! [ "$DEVICE" = "openvino" ] && ! [ "$DEVICE" = "rocm" ]; then echo "libmimalloc2.0"; fi) && \
apt-get autoremove -yqq && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN ln -s "/usr/lib/$(arch)-linux-gnu/libmimalloc.so.2" /usr/lib/libmimalloc.so.2
WORKDIR /usr/src
ENV TRANSFORMERS_CACHE=/cache \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PATH="/opt/venv/bin:$PATH" \
PYTHONPATH=/usr/src \
DEVICE=${DEVICE} \
VIRTUAL_ENV=/opt/venv \
MACHINE_LEARNING_CACHE_FOLDER=/cache
# prevent core dumps
RUN echo "hard core 0" >> /etc/security/limits.conf && \
echo "fs.suid_dumpable 0" >> /etc/sysctl.conf && \
echo 'ulimit -S -c 0 > /dev/null 2>&1' >> /etc/profile
COPY --from=builder /opt/venv /opt/venv
COPY scripts/healthcheck.py .
COPY immich_ml immich_ml
ARG BUILD_ID
ARG BUILD_IMAGE
ARG BUILD_SOURCE_REF
ARG BUILD_SOURCE_COMMIT
ENV IMMICH_BUILD=${BUILD_ID}
ENV IMMICH_BUILD_URL=https://github.com/immich-app/immich/actions/runs/${BUILD_ID}
ENV IMMICH_BUILD_IMAGE=${BUILD_IMAGE}
ENV IMMICH_BUILD_IMAGE_URL=https://github.com/immich-app/immich/pkgs/container/immich-machine-learning
ENV IMMICH_REPOSITORY=immich-app/immich
ENV IMMICH_REPOSITORY_URL=https://github.com/immich-app/immich
ENV IMMICH_SOURCE_REF=${BUILD_SOURCE_REF}
ENV IMMICH_SOURCE_COMMIT=${BUILD_SOURCE_COMMIT}
ENV IMMICH_SOURCE_URL=https://github.com/immich-app/immich/commit/${BUILD_SOURCE_COMMIT}
ENTRYPOINT ["tini", "--"]
CMD ["python", "-m", "immich_ml"]
HEALTHCHECK CMD python3 healthcheck.py