mirror of
https://github.com/immich-app/immich.git
synced 2025-07-09 03:04:16 -04:00
use composite cache key
1.19.2 fix variable name fix variable reference aaaaaaaaaaaaaaaaaaaa
This commit is contained in:
parent
ec0eb93036
commit
fe2ddc3644
150
machine-learning/0001-fix-rocm-conv-thread-safety.patch
Normal file
150
machine-learning/0001-fix-rocm-conv-thread-safety.patch
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
From 350e3237eadb738a0d96295a62f2eed96653c315 Mon Sep 17 00:00:00 2001
|
||||||
|
From: mertalev <101130780+mertalev@users.noreply.github.com>
|
||||||
|
Date: Fri, 20 Dec 2024 00:59:21 -0500
|
||||||
|
Subject: [PATCH 1/1] fix: avoid race condition for rocm conv algo caching
|
||||||
|
|
||||||
|
---
|
||||||
|
onnxruntime/core/providers/rocm/nn/conv.cc | 8 ++++----
|
||||||
|
onnxruntime/core/providers/rocm/nn/conv.h | 14 ++++++++++++--
|
||||||
|
.../core/providers/rocm/nn/conv_transpose.cc | 8 ++++----
|
||||||
|
3 files changed, 20 insertions(+), 10 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/onnxruntime/core/providers/rocm/nn/conv.cc b/onnxruntime/core/providers/rocm/nn/conv.cc
|
||||||
|
index d7f47d07a8..98b6b69212 100644
|
||||||
|
--- a/onnxruntime/core/providers/rocm/nn/conv.cc
|
||||||
|
+++ b/onnxruntime/core/providers/rocm/nn/conv.cc
|
||||||
|
@@ -127,7 +127,6 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
|
||||||
|
|
||||||
|
if (w_dims_changed) {
|
||||||
|
s_.last_w_dims = gsl::make_span(w_dims);
|
||||||
|
- s_.cached_benchmark_fwd_results.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
ORT_RETURN_IF_ERROR(conv_attrs_.ValidateInputShape(X->Shape(), W->Shape(), channels_last, channels_last));
|
||||||
|
@@ -278,7 +277,8 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
|
||||||
|
HIP_CALL_THROW(hipMemsetAsync(s_.b_zero, 0, malloc_size, Stream(context)));
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (!s_.cached_benchmark_fwd_results.contains(x_dims_miopen)) {
|
||||||
|
+ const std::size_t algo_key = HashConvAlgoKey(x_dims_miopen, w_dims);
|
||||||
|
+ if (!s_.cached_benchmark_fwd_results.contains(algo_key)) {
|
||||||
|
miopenConvAlgoPerf_t perf;
|
||||||
|
int algo_count = 1;
|
||||||
|
const ROCMExecutionProvider* rocm_ep = static_cast<const ROCMExecutionProvider*>(this->Info().GetExecutionProvider());
|
||||||
|
@@ -301,9 +301,9 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
|
||||||
|
algo_search_workspace.get(),
|
||||||
|
max_ws_size,
|
||||||
|
false)); // Do not do exhaustive algo search.
|
||||||
|
- s_.cached_benchmark_fwd_results.insert(x_dims_miopen, {perf.fwd_algo, perf.memory});
|
||||||
|
+ s_.cached_benchmark_fwd_results.insert(algo_key, {perf.fwd_algo, perf.memory});
|
||||||
|
}
|
||||||
|
- const auto& perf = s_.cached_benchmark_fwd_results.at(x_dims_miopen);
|
||||||
|
+ const auto& perf = s_.cached_benchmark_fwd_results.at(algo_key);
|
||||||
|
s_.fwd_algo = perf.fwd_algo;
|
||||||
|
s_.workspace_bytes = perf.memory;
|
||||||
|
} else {
|
||||||
|
diff --git a/onnxruntime/core/providers/rocm/nn/conv.h b/onnxruntime/core/providers/rocm/nn/conv.h
|
||||||
|
index bc9846203e..b1ca5f8e4b 100644
|
||||||
|
--- a/onnxruntime/core/providers/rocm/nn/conv.h
|
||||||
|
+++ b/onnxruntime/core/providers/rocm/nn/conv.h
|
||||||
|
@@ -43,6 +43,11 @@ struct vector_hash {
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
+inline std::size_t HashConvAlgoKey(const TensorShapeVector& x_dims, const TensorShapeVector& w_dims) {
|
||||||
|
+ vector_hash vh;
|
||||||
|
+ return vh(x_dims) ^ vh(w_dims);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
template <typename Key, typename T,
|
||||||
|
typename Hash = std::hash<Key>,
|
||||||
|
typename KeyEqual = std::equal_to<Key>,
|
||||||
|
@@ -52,6 +57,7 @@ class lru_unordered_map {
|
||||||
|
lru_unordered_map(size_t max_size) : max_size_(max_size) {}
|
||||||
|
|
||||||
|
void insert(const Key& key, const T& value) {
|
||||||
|
+ std::lock_guard<std::mutex> guard(mutex_);
|
||||||
|
auto it = items_.find(key);
|
||||||
|
if (it != items_.end()) {
|
||||||
|
it->second.value = value;
|
||||||
|
@@ -69,6 +75,7 @@ class lru_unordered_map {
|
||||||
|
}
|
||||||
|
|
||||||
|
T& at(const Key& key) {
|
||||||
|
+ std::lock_guard<std::mutex> guard(mutex_);
|
||||||
|
auto it = items_.find(key);
|
||||||
|
if (it == items_.end()) {
|
||||||
|
throw std::out_of_range("There is no such key in cache");
|
||||||
|
@@ -78,6 +85,7 @@ class lru_unordered_map {
|
||||||
|
}
|
||||||
|
|
||||||
|
bool contains(const Key& key) const {
|
||||||
|
+ std::lock_guard<std::mutex> guard(mutex_);
|
||||||
|
return items_.find(key) != items_.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -86,6 +94,7 @@ class lru_unordered_map {
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear() {
|
||||||
|
+ std::lock_guard<std::mutex> guard(mutex_);
|
||||||
|
items_.clear();
|
||||||
|
lru_list_.clear();
|
||||||
|
}
|
||||||
|
@@ -106,6 +115,7 @@ class lru_unordered_map {
|
||||||
|
size_t max_size_;
|
||||||
|
std::unordered_map<Key, value_type, Hash, KeyEqual, MapAllocator> items_;
|
||||||
|
list_type lru_list_;
|
||||||
|
+ mutable std::mutex mutex_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// cached miopen descriptors
|
||||||
|
@@ -148,8 +158,8 @@ struct MiopenConvState {
|
||||||
|
decltype(AlgoPerfType().memory) memory;
|
||||||
|
};
|
||||||
|
|
||||||
|
- lru_unordered_map<TensorShapeVector, PerfFwdResultParams, vector_hash> cached_benchmark_fwd_results{MAX_CACHED_ALGO_PERF_RESULTS};
|
||||||
|
- lru_unordered_map<TensorShapeVector, PerfBwdResultParams, vector_hash> cached_benchmark_bwd_results{MAX_CACHED_ALGO_PERF_RESULTS};
|
||||||
|
+ lru_unordered_map<std::size_t, PerfFwdResultParams> cached_benchmark_fwd_results{MAX_CACHED_ALGO_PERF_RESULTS};
|
||||||
|
+ lru_unordered_map<std::size_t, PerfBwdResultParams> cached_benchmark_bwd_results{MAX_CACHED_ALGO_PERF_RESULTS};
|
||||||
|
|
||||||
|
// Some properties needed to support asymmetric padded Conv nodes
|
||||||
|
bool post_slicing_required;
|
||||||
|
diff --git a/onnxruntime/core/providers/rocm/nn/conv_transpose.cc b/onnxruntime/core/providers/rocm/nn/conv_transpose.cc
|
||||||
|
index 7447113fdf..dea9bf2a05 100644
|
||||||
|
--- a/onnxruntime/core/providers/rocm/nn/conv_transpose.cc
|
||||||
|
+++ b/onnxruntime/core/providers/rocm/nn/conv_transpose.cc
|
||||||
|
@@ -76,7 +76,6 @@ Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dy
|
||||||
|
|
||||||
|
if (w_dims_changed) {
|
||||||
|
s_.last_w_dims = gsl::make_span(w_dims);
|
||||||
|
- s_.cached_benchmark_bwd_results.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
ConvTransposeAttributes::Prepare p;
|
||||||
|
@@ -127,7 +126,8 @@ Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dy
|
||||||
|
|
||||||
|
y_data = reinterpret_cast<HipT*>(p.Y->MutableData<T>());
|
||||||
|
|
||||||
|
- if (!s_.cached_benchmark_bwd_results.contains(x_dims)) {
|
||||||
|
+ const std::size_t algo_key = HashConvAlgoKey(x_dims, w_dims);
|
||||||
|
+ if (!s_.cached_benchmark_bwd_results.contains(algo_key)) {
|
||||||
|
IAllocatorUniquePtr<void> algo_search_workspace = GetScratchBuffer<void>(AlgoSearchWorkspaceSize, context->GetComputeStream());
|
||||||
|
|
||||||
|
miopenConvAlgoPerf_t perf;
|
||||||
|
@@ -147,10 +147,10 @@ Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dy
|
||||||
|
algo_search_workspace.get(),
|
||||||
|
AlgoSearchWorkspaceSize,
|
||||||
|
false));
|
||||||
|
- s_.cached_benchmark_bwd_results.insert(x_dims, {perf.bwd_data_algo, perf.memory});
|
||||||
|
+ s_.cached_benchmark_bwd_results.insert(algo_key, {perf.bwd_data_algo, perf.memory});
|
||||||
|
}
|
||||||
|
|
||||||
|
- const auto& perf = s_.cached_benchmark_bwd_results.at(x_dims);
|
||||||
|
+ const auto& perf = s_.cached_benchmark_bwd_results.at(algo_key);
|
||||||
|
s_.bwd_data_algo = perf.bwd_data_algo;
|
||||||
|
s_.workspace_bytes = perf.memory;
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
@ -1,58 +0,0 @@
|
|||||||
From 1f5d6323fa69ee16feab25f8e1398c1aed03ee08 Mon Sep 17 00:00:00 2001
|
|
||||||
From: mertalev <101130780+mertalev@users.noreply.github.com>
|
|
||||||
Date: Sun, 29 Dec 2024 14:07:51 -0500
|
|
||||||
Subject: [PATCH] guard algo benchmark results
|
|
||||||
|
|
||||||
---
|
|
||||||
onnxruntime/core/providers/rocm/nn/conv.h | 6 ++++++
|
|
||||||
1 file changed, 6 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/onnxruntime/core/providers/rocm/nn/conv.h b/onnxruntime/core/providers/rocm/nn/conv.h
|
|
||||||
index bc9846203e..0086e064f1 100644
|
|
||||||
--- a/onnxruntime/core/providers/rocm/nn/conv.h
|
|
||||||
+++ b/onnxruntime/core/providers/rocm/nn/conv.h
|
|
||||||
@@ -52,6 +52,7 @@ class lru_unordered_map {
|
|
||||||
lru_unordered_map(size_t max_size) : max_size_(max_size) {}
|
|
||||||
|
|
||||||
void insert(const Key& key, const T& value) {
|
|
||||||
+ std::lock_guard<std::mutex> guard(mutex_);
|
|
||||||
auto it = items_.find(key);
|
|
||||||
if (it != items_.end()) {
|
|
||||||
it->second.value = value;
|
|
||||||
@@ -69,6 +70,7 @@ class lru_unordered_map {
|
|
||||||
}
|
|
||||||
|
|
||||||
T& at(const Key& key) {
|
|
||||||
+ std::lock_guard<std::mutex> guard(mutex_);
|
|
||||||
auto it = items_.find(key);
|
|
||||||
if (it == items_.end()) {
|
|
||||||
throw std::out_of_range("There is no such key in cache");
|
|
||||||
@@ -78,14 +80,17 @@ class lru_unordered_map {
|
|
||||||
}
|
|
||||||
|
|
||||||
bool contains(const Key& key) const {
|
|
||||||
+ std::lock_guard<std::mutex> guard(mutex_);
|
|
||||||
return items_.find(key) != items_.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t size() const {
|
|
||||||
+ std::lock_guard<std::mutex> guard(mutex_);
|
|
||||||
return items_.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
void clear() {
|
|
||||||
+ std::lock_guard<std::mutex> guard(mutex_);
|
|
||||||
items_.clear();
|
|
||||||
lru_list_.clear();
|
|
||||||
}
|
|
||||||
@@ -106,6 +111,7 @@ class lru_unordered_map {
|
|
||||||
size_t max_size_;
|
|
||||||
std::unordered_map<Key, value_type, Hash, KeyEqual, MapAllocator> items_;
|
|
||||||
list_type lru_list_;
|
|
||||||
+ mutable std::mutex mutex_;
|
|
||||||
};
|
|
||||||
|
|
||||||
// cached miopen descriptors
|
|
||||||
--
|
|
||||||
2.43.0
|
|
||||||
|
|
@ -17,6 +17,7 @@ RUN mkdir /opt/armnn && \
|
|||||||
|
|
||||||
# Warning: 26.3Gb of disk space required to pull this image
|
# Warning: 26.3Gb of disk space required to pull this image
|
||||||
# https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.rocm
|
# https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.rocm
|
||||||
|
# 6.2 or later fails to build as of writing
|
||||||
FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder-rocm
|
FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder-rocm
|
||||||
|
|
||||||
WORKDIR /code
|
WORKDIR /code
|
||||||
@ -32,11 +33,12 @@ RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.
|
|||||||
ENV PATH /code/cmake-3.27.3-linux-x86_64/bin:${PATH}
|
ENV PATH /code/cmake-3.27.3-linux-x86_64/bin:${PATH}
|
||||||
|
|
||||||
# Prepare onnxruntime repository & build onnxruntime
|
# Prepare onnxruntime repository & build onnxruntime
|
||||||
|
# 1.20.1 fails to build as of writing
|
||||||
RUN git clone --single-branch --branch v1.19.2 --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
|
RUN git clone --single-branch --branch v1.19.2 --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
|
||||||
WORKDIR /code/onnxruntime
|
WORKDIR /code/onnxruntime
|
||||||
# Fix for multi-threading based on comments in https://github.com/microsoft/onnxruntime/pull/19567
|
# Fix for multi-threading based on comments in https://github.com/microsoft/onnxruntime/pull/19567
|
||||||
COPY ./0001-guard-algo-benchmark-results.patch /tmp/
|
COPY ./0001-fix-rocm-conv-thread-safety.patch /tmp/
|
||||||
RUN git apply /tmp/0001-guard-algo-benchmark-results.patch
|
RUN git apply /tmp/0001-fix-rocm-conv-thread-safety.patch
|
||||||
|
|
||||||
RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
|
RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
|
||||||
# Note: the `parallel` setting uses a substantial amount of RAM
|
# Note: the `parallel` setting uses a substantial amount of RAM
|
||||||
@ -112,7 +114,7 @@ COPY --from=builder-armnn \
|
|||||||
/opt/ann/build.sh \
|
/opt/ann/build.sh \
|
||||||
/opt/armnn/
|
/opt/armnn/
|
||||||
|
|
||||||
FROM rocm/dev-ubuntu-24.04:6.2.4-complete AS prod-rocm
|
FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS prod-rocm
|
||||||
|
|
||||||
|
|
||||||
FROM prod-${DEVICE} AS prod
|
FROM prod-${DEVICE} AS prod
|
||||||
|
Loading…
x
Reference in New Issue
Block a user