From d9a41b8ea0aa86923bad4c36ae73eab42f286dc5 Mon Sep 17 00:00:00 2001 From: mertalev <101130780+mertalev@users.noreply.github.com> Date: Thu, 19 Dec 2024 20:53:40 -0500 Subject: [PATCH] bump versions, run on mich use 3.12 use 1.19.2 --- .../docs/features/ml-hardware-acceleration.md | 6 ++--- machine-learning/Dockerfile | 24 ++++++++----------- machine-learning/app/models/constants.py | 7 +++++- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/docs/docs/features/ml-hardware-acceleration.md b/docs/docs/features/ml-hardware-acceleration.md index 84f99ff526..87be3994e8 100644 --- a/docs/docs/features/ml-hardware-acceleration.md +++ b/docs/docs/features/ml-hardware-acceleration.md @@ -11,7 +11,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele - ARM NN (Mali) - CUDA (NVIDIA GPUs with [compute capability](https://developer.nvidia.com/cuda-gpus) 5.2 or higher) -- ROCM (AMD GPUs) +- ROCm (AMD GPUs) - OpenVINO (Intel GPUs such as Iris Xe and Arc) ## Limitations @@ -42,9 +42,9 @@ You do not need to redo any machine learning jobs after enabling hardware accele - The installed driver must be >= 535 (it must support CUDA 12.2). - On Linux (except for WSL2), you also need to have [NVIDIA Container Toolkit][nvct] installed. -#### ROCM +#### ROCm -- The GPU must be supported by ROCM (or use `HSA_OVERRIDE_GFX_VERSION=`) +- The GPU must be supported by ROCm. If it isn't officially supported, you can attempt to use the `HSA_OVERRIDE_GFX_VERSION` environmental variable: `HSA_OVERRIDE_GFX_VERSION=`. #### OpenVINO diff --git a/machine-learning/Dockerfile b/machine-learning/Dockerfile index 3d7e0c2cd7..2644514015 100644 --- a/machine-learning/Dockerfile +++ b/machine-learning/Dockerfile @@ -17,11 +17,11 @@ RUN mkdir /opt/armnn && \ # Warning: 26.3Gb of disk space required to pull this image # https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.rocm -FROM rocm/dev-ubuntu-22.04:6.1.2-complete as builder-rocm +FROM rocm/dev-ubuntu-24.04:6.2.4-complete AS builder-rocm WORKDIR /code -RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.10-venv +RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.12-venv # Install same version as the Dockerfile provided by onnxruntime RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.27.3-linux-x86_64.sh && \ chmod +x cmake-3.27.3-linux-x86_64.sh && \ @@ -32,21 +32,17 @@ RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3. ENV PATH /code/cmake-3.27.3-linux-x86_64/bin:${PATH} # Prepare onnxruntime repository & build onnxruntime -RUN git clone --single-branch --branch v1.18.1 --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime +# Note: cannot upgrade from 1.19.2 as of writing until upstream updates the ROCm CI +RUN git clone --single-branch --branch v1.19.2 --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime WORKDIR /code/onnxruntime -# EDIT PR -# While there's still this PR open, we need to compile on the branch of the PR -# https://github.com/microsoft/onnxruntime/pull/19567 +# Fix for multi-threading based on comments in https://github.com/microsoft/onnxruntime/pull/19567 COPY ./0001-fix-avoid-race-condition-for-rocm-conv-algo-caching.patch /tmp/ RUN git apply /tmp/0001-fix-avoid-race-condition-for-rocm-conv-algo-caching.patch -# END EDIT PR + RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh -# I ran into a compilation error when parallelizing the build -# I used 12 threads to build onnxruntime, but it needs more than 16GB of RAM, and that's the amount of RAM I have on my machine -# I lowered the number of threads to 8, and it worked -# Even with 12 threads, the compilation took more than 1,5 hours to fail -RUN ./build.sh --allow_running_as_root --config Release --build_wheel --update --build --parallel 9 --cmake_extra_defines\ - ONNXRUNTIME_VERSION=1.18.1 --use_rocm --rocm_home=/opt/rocm +# Note: the `parallel` setting uses a substantial amount of RAM +RUN ./build.sh --allow_running_as_root --config Release --build_wheel --update --build --parallel 13 --cmake_extra_defines\ + ONNXRUNTIME_VERSION=1.19.2 --use_rocm --rocm_home=/opt/rocm RUN mv /code/onnxruntime/build/Linux/Release/dist/*.whl /opt/ FROM builder-${DEVICE} AS builder @@ -117,7 +113,7 @@ COPY --from=builder-armnn \ /opt/ann/build.sh \ /opt/armnn/ -FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS prod-rocm +FROM rocm/dev-ubuntu-24.04:6.2.4-complete AS prod-rocm FROM prod-${DEVICE} AS prod diff --git a/machine-learning/app/models/constants.py b/machine-learning/app/models/constants.py index d5b3f4dfd2..43088741b9 100644 --- a/machine-learning/app/models/constants.py +++ b/machine-learning/app/models/constants.py @@ -63,7 +63,12 @@ _INSIGHTFACE_MODELS = { } -SUPPORTED_PROVIDERS = ["CUDAExecutionProvider", "ROCMExecutionProvider", "OpenVINOExecutionProvider", "CPUExecutionProvider"] +SUPPORTED_PROVIDERS = [ + "CUDAExecutionProvider", + "ROCMExecutionProvider", + "OpenVINOExecutionProvider", + "CPUExecutionProvider", +] def get_model_source(model_name: str) -> ModelSource | None: