mirror of
https://github.com/immich-app/immich.git
synced 2025-07-09 03:04:16 -04:00
acquire lock before any changes can be made
guard algo benchmark results mark mutex as mutable re-add /bin/sh (?) use 3.10 use 6.1.2
This commit is contained in:
parent
d9a41b8ea0
commit
ec0eb93036
@ -1,25 +0,0 @@
|
|||||||
From e267bc9bab8b3873dba57323ddcd9a9d09a1211e Mon Sep 17 00:00:00 2001
|
|
||||||
From: mertalev <101130780+mertalev@users.noreply.github.com>
|
|
||||||
Date: Fri, 20 Dec 2024 00:59:21 -0500
|
|
||||||
Subject: [PATCH] fix: avoid race condition for rocm conv algo caching
|
|
||||||
|
|
||||||
---
|
|
||||||
onnxruntime/core/providers/rocm/nn/conv.cc | 2 ++
|
|
||||||
1 file changed, 2 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/onnxruntime/core/providers/rocm/nn/conv.cc b/onnxruntime/core/providers/rocm/nn/conv.cc
|
|
||||||
index d7f47d07a8..ec438287ac 100644
|
|
||||||
--- a/onnxruntime/core/providers/rocm/nn/conv.cc
|
|
||||||
+++ b/onnxruntime/core/providers/rocm/nn/conv.cc
|
|
||||||
@@ -278,6 +278,8 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
|
|
||||||
HIP_CALL_THROW(hipMemsetAsync(s_.b_zero, 0, malloc_size, Stream(context)));
|
|
||||||
}
|
|
||||||
|
|
||||||
+ // lock is needed to avoid race condition during algo search
|
|
||||||
+ std::lock_guard<OrtMutex> lock(s_.mutex);
|
|
||||||
if (!s_.cached_benchmark_fwd_results.contains(x_dims_miopen)) {
|
|
||||||
miopenConvAlgoPerf_t perf;
|
|
||||||
int algo_count = 1;
|
|
||||||
--
|
|
||||||
2.43.0
|
|
||||||
|
|
58
machine-learning/0001-guard-algo-benchmark-results.patch
Normal file
58
machine-learning/0001-guard-algo-benchmark-results.patch
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
From 1f5d6323fa69ee16feab25f8e1398c1aed03ee08 Mon Sep 17 00:00:00 2001
|
||||||
|
From: mertalev <101130780+mertalev@users.noreply.github.com>
|
||||||
|
Date: Sun, 29 Dec 2024 14:07:51 -0500
|
||||||
|
Subject: [PATCH] guard algo benchmark results
|
||||||
|
|
||||||
|
---
|
||||||
|
onnxruntime/core/providers/rocm/nn/conv.h | 6 ++++++
|
||||||
|
1 file changed, 6 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/onnxruntime/core/providers/rocm/nn/conv.h b/onnxruntime/core/providers/rocm/nn/conv.h
|
||||||
|
index bc9846203e..0086e064f1 100644
|
||||||
|
--- a/onnxruntime/core/providers/rocm/nn/conv.h
|
||||||
|
+++ b/onnxruntime/core/providers/rocm/nn/conv.h
|
||||||
|
@@ -52,6 +52,7 @@ class lru_unordered_map {
|
||||||
|
lru_unordered_map(size_t max_size) : max_size_(max_size) {}
|
||||||
|
|
||||||
|
void insert(const Key& key, const T& value) {
|
||||||
|
+ std::lock_guard<std::mutex> guard(mutex_);
|
||||||
|
auto it = items_.find(key);
|
||||||
|
if (it != items_.end()) {
|
||||||
|
it->second.value = value;
|
||||||
|
@@ -69,6 +70,7 @@ class lru_unordered_map {
|
||||||
|
}
|
||||||
|
|
||||||
|
T& at(const Key& key) {
|
||||||
|
+ std::lock_guard<std::mutex> guard(mutex_);
|
||||||
|
auto it = items_.find(key);
|
||||||
|
if (it == items_.end()) {
|
||||||
|
throw std::out_of_range("There is no such key in cache");
|
||||||
|
@@ -78,14 +80,17 @@ class lru_unordered_map {
|
||||||
|
}
|
||||||
|
|
||||||
|
bool contains(const Key& key) const {
|
||||||
|
+ std::lock_guard<std::mutex> guard(mutex_);
|
||||||
|
return items_.find(key) != items_.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t size() const {
|
||||||
|
+ std::lock_guard<std::mutex> guard(mutex_);
|
||||||
|
return items_.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear() {
|
||||||
|
+ std::lock_guard<std::mutex> guard(mutex_);
|
||||||
|
items_.clear();
|
||||||
|
lru_list_.clear();
|
||||||
|
}
|
||||||
|
@@ -106,6 +111,7 @@ class lru_unordered_map {
|
||||||
|
size_t max_size_;
|
||||||
|
std::unordered_map<Key, value_type, Hash, KeyEqual, MapAllocator> items_;
|
||||||
|
list_type lru_list_;
|
||||||
|
+ mutable std::mutex mutex_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// cached miopen descriptors
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
@ -17,11 +17,11 @@ RUN mkdir /opt/armnn && \
|
|||||||
|
|
||||||
# Warning: 26.3Gb of disk space required to pull this image
|
# Warning: 26.3Gb of disk space required to pull this image
|
||||||
# https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.rocm
|
# https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.rocm
|
||||||
FROM rocm/dev-ubuntu-24.04:6.2.4-complete AS builder-rocm
|
FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder-rocm
|
||||||
|
|
||||||
WORKDIR /code
|
WORKDIR /code
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.12-venv
|
RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.10-venv
|
||||||
# Install same version as the Dockerfile provided by onnxruntime
|
# Install same version as the Dockerfile provided by onnxruntime
|
||||||
RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.27.3-linux-x86_64.sh && \
|
RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.27.3-linux-x86_64.sh && \
|
||||||
chmod +x cmake-3.27.3-linux-x86_64.sh && \
|
chmod +x cmake-3.27.3-linux-x86_64.sh && \
|
||||||
@ -32,12 +32,11 @@ RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.
|
|||||||
ENV PATH /code/cmake-3.27.3-linux-x86_64/bin:${PATH}
|
ENV PATH /code/cmake-3.27.3-linux-x86_64/bin:${PATH}
|
||||||
|
|
||||||
# Prepare onnxruntime repository & build onnxruntime
|
# Prepare onnxruntime repository & build onnxruntime
|
||||||
# Note: cannot upgrade from 1.19.2 as of writing until upstream updates the ROCm CI
|
|
||||||
RUN git clone --single-branch --branch v1.19.2 --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
|
RUN git clone --single-branch --branch v1.19.2 --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
|
||||||
WORKDIR /code/onnxruntime
|
WORKDIR /code/onnxruntime
|
||||||
# Fix for multi-threading based on comments in https://github.com/microsoft/onnxruntime/pull/19567
|
# Fix for multi-threading based on comments in https://github.com/microsoft/onnxruntime/pull/19567
|
||||||
COPY ./0001-fix-avoid-race-condition-for-rocm-conv-algo-caching.patch /tmp/
|
COPY ./0001-guard-algo-benchmark-results.patch /tmp/
|
||||||
RUN git apply /tmp/0001-fix-avoid-race-condition-for-rocm-conv-algo-caching.patch
|
RUN git apply /tmp/0001-guard-algo-benchmark-results.patch
|
||||||
|
|
||||||
RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
|
RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
|
||||||
# Note: the `parallel` setting uses a substantial amount of RAM
|
# Note: the `parallel` setting uses a substantial amount of RAM
|
||||||
|
Loading…
x
Reference in New Issue
Block a user