From d9a41b8ea0aa86923bad4c36ae73eab42f286dc5 Mon Sep 17 00:00:00 2001
From: mertalev <101130780+mertalev@users.noreply.github.com>
Date: Thu, 19 Dec 2024 20:53:40 -0500
Subject: [PATCH] bump versions, run on mich

use 3.12

use 1.19.2
---
 .../docs/features/ml-hardware-acceleration.md |  6 ++---
 machine-learning/Dockerfile                   | 24 ++++++++-----------
 machine-learning/app/models/constants.py      |  7 +++++-
 3 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/docs/docs/features/ml-hardware-acceleration.md b/docs/docs/features/ml-hardware-acceleration.md
index 84f99ff526..87be3994e8 100644
--- a/docs/docs/features/ml-hardware-acceleration.md
+++ b/docs/docs/features/ml-hardware-acceleration.md
@@ -11,7 +11,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
 
 - ARM NN (Mali)
 - CUDA (NVIDIA GPUs with [compute capability](https://developer.nvidia.com/cuda-gpus) 5.2 or higher)
-- ROCM (AMD GPUs)
+- ROCm (AMD GPUs)
 - OpenVINO (Intel GPUs such as Iris Xe and Arc)
 
 ## Limitations
@@ -42,9 +42,9 @@ You do not need to redo any machine learning jobs after enabling hardware accele
 - The installed driver must be >= 535 (it must support CUDA 12.2).
 - On Linux (except for WSL2), you also need to have [NVIDIA Container Toolkit][nvct] installed.
 
-#### ROCM
+#### ROCm
 
-- The GPU must be supported by ROCM (or use `HSA_OVERRIDE_GFX_VERSION=<a supported version, ie 10.3.0>`)
+- The GPU must be supported by ROCm. If it isn't officially supported, you can attempt to use the `HSA_OVERRIDE_GFX_VERSION` environmental variable: `HSA_OVERRIDE_GFX_VERSION=<a supported version, e.g. 10.3.0>`.
 
 #### OpenVINO
 
diff --git a/machine-learning/Dockerfile b/machine-learning/Dockerfile
index 3d7e0c2cd7..2644514015 100644
--- a/machine-learning/Dockerfile
+++ b/machine-learning/Dockerfile
@@ -17,11 +17,11 @@ RUN mkdir /opt/armnn && \
 
 # Warning: 26.3Gb of disk space required to pull this image
 # https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.rocm
-FROM rocm/dev-ubuntu-22.04:6.1.2-complete as builder-rocm
+FROM rocm/dev-ubuntu-24.04:6.2.4-complete AS builder-rocm
 
 WORKDIR /code
 
-RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.10-venv
+RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.12-venv
 # Install same version as the Dockerfile provided by onnxruntime
 RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.27.3-linux-x86_64.sh && \
     chmod +x cmake-3.27.3-linux-x86_64.sh && \
@@ -32,21 +32,17 @@ RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.
 ENV PATH /code/cmake-3.27.3-linux-x86_64/bin:${PATH}
 
 # Prepare onnxruntime repository & build onnxruntime
-RUN git clone --single-branch --branch v1.18.1 --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
+# Note: cannot upgrade from 1.19.2 as of writing until upstream updates the ROCm CI
+RUN git clone --single-branch --branch v1.19.2 --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
 WORKDIR /code/onnxruntime
-# EDIT PR
-# While there's still this PR open, we need to compile on the branch of the PR
-# https://github.com/microsoft/onnxruntime/pull/19567
+# Fix for multi-threading based on comments in https://github.com/microsoft/onnxruntime/pull/19567
 COPY ./0001-fix-avoid-race-condition-for-rocm-conv-algo-caching.patch /tmp/
 RUN git apply /tmp/0001-fix-avoid-race-condition-for-rocm-conv-algo-caching.patch
-# END EDIT PR
+
 RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
-# I ran into a compilation error when parallelizing the build
-# I used 12 threads to build onnxruntime, but it needs more than 16GB of RAM, and that's the amount of RAM I have on my machine
-# I lowered the number of threads to 8, and it worked
-# Even with 12 threads, the compilation took more than 1,5 hours to fail
-RUN ./build.sh --allow_running_as_root --config Release --build_wheel --update --build --parallel 9 --cmake_extra_defines\
-    ONNXRUNTIME_VERSION=1.18.1 --use_rocm --rocm_home=/opt/rocm
+# Note: the `parallel` setting uses a substantial amount of RAM
+RUN ./build.sh --allow_running_as_root --config Release --build_wheel --update --build --parallel 13 --cmake_extra_defines\
+    ONNXRUNTIME_VERSION=1.19.2 --use_rocm --rocm_home=/opt/rocm
 RUN mv /code/onnxruntime/build/Linux/Release/dist/*.whl /opt/
 
 FROM builder-${DEVICE} AS builder
@@ -117,7 +113,7 @@ COPY --from=builder-armnn \
     /opt/ann/build.sh \
     /opt/armnn/
 
-FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS prod-rocm
+FROM rocm/dev-ubuntu-24.04:6.2.4-complete AS prod-rocm
 
 
 FROM prod-${DEVICE} AS prod
diff --git a/machine-learning/app/models/constants.py b/machine-learning/app/models/constants.py
index d5b3f4dfd2..43088741b9 100644
--- a/machine-learning/app/models/constants.py
+++ b/machine-learning/app/models/constants.py
@@ -63,7 +63,12 @@ _INSIGHTFACE_MODELS = {
 }
 
 
-SUPPORTED_PROVIDERS = ["CUDAExecutionProvider", "ROCMExecutionProvider", "OpenVINOExecutionProvider", "CPUExecutionProvider"]
+SUPPORTED_PROVIDERS = [
+    "CUDAExecutionProvider",
+    "ROCMExecutionProvider",
+    "OpenVINOExecutionProvider",
+    "CPUExecutionProvider",
+]
 
 
 def get_model_source(model_name: str) -> ModelSource | None: