mirror of
				https://github.com/immich-app/immich.git
				synced 2025-11-04 03:27:09 -05:00 
			
		
		
		
	acquire lock before any changes can be made
guard algo benchmark results mark mutex as mutable re-add /bin/sh (?) use 3.10 use 6.1.2
This commit is contained in:
		
							parent
							
								
									d9a41b8ea0
								
							
						
					
					
						commit
						ec0eb93036
					
				@ -1,25 +0,0 @@
 | 
				
			|||||||
From e267bc9bab8b3873dba57323ddcd9a9d09a1211e Mon Sep 17 00:00:00 2001
 | 
					 | 
				
			||||||
From: mertalev <101130780+mertalev@users.noreply.github.com>
 | 
					 | 
				
			||||||
Date: Fri, 20 Dec 2024 00:59:21 -0500
 | 
					 | 
				
			||||||
Subject: [PATCH] fix: avoid race condition for rocm conv algo caching
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 onnxruntime/core/providers/rocm/nn/conv.cc | 2 ++
 | 
					 | 
				
			||||||
 1 file changed, 2 insertions(+)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
diff --git a/onnxruntime/core/providers/rocm/nn/conv.cc b/onnxruntime/core/providers/rocm/nn/conv.cc
 | 
					 | 
				
			||||||
index d7f47d07a8..ec438287ac 100644
 | 
					 | 
				
			||||||
--- a/onnxruntime/core/providers/rocm/nn/conv.cc
 | 
					 | 
				
			||||||
+++ b/onnxruntime/core/providers/rocm/nn/conv.cc
 | 
					 | 
				
			||||||
@@ -278,6 +278,8 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
 | 
					 | 
				
			||||||
       HIP_CALL_THROW(hipMemsetAsync(s_.b_zero, 0, malloc_size, Stream(context)));
 | 
					 | 
				
			||||||
     }
 | 
					 | 
				
			||||||
 
 | 
					 | 
				
			||||||
+    // lock is needed to avoid race condition during algo search
 | 
					 | 
				
			||||||
+    std::lock_guard<OrtMutex> lock(s_.mutex);
 | 
					 | 
				
			||||||
     if (!s_.cached_benchmark_fwd_results.contains(x_dims_miopen)) {
 | 
					 | 
				
			||||||
       miopenConvAlgoPerf_t perf;
 | 
					 | 
				
			||||||
       int algo_count = 1;
 | 
					 | 
				
			||||||
-- 
 | 
					 | 
				
			||||||
2.43.0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
							
								
								
									
										58
									
								
								machine-learning/0001-guard-algo-benchmark-results.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								machine-learning/0001-guard-algo-benchmark-results.patch
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,58 @@
 | 
				
			|||||||
 | 
					From 1f5d6323fa69ee16feab25f8e1398c1aed03ee08 Mon Sep 17 00:00:00 2001
 | 
				
			||||||
 | 
					From: mertalev <101130780+mertalev@users.noreply.github.com>
 | 
				
			||||||
 | 
					Date: Sun, 29 Dec 2024 14:07:51 -0500
 | 
				
			||||||
 | 
					Subject: [PATCH] guard algo benchmark results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					 onnxruntime/core/providers/rocm/nn/conv.h | 6 ++++++
 | 
				
			||||||
 | 
					 1 file changed, 6 insertions(+)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					diff --git a/onnxruntime/core/providers/rocm/nn/conv.h b/onnxruntime/core/providers/rocm/nn/conv.h
 | 
				
			||||||
 | 
					index bc9846203e..0086e064f1 100644
 | 
				
			||||||
 | 
					--- a/onnxruntime/core/providers/rocm/nn/conv.h
 | 
				
			||||||
 | 
					+++ b/onnxruntime/core/providers/rocm/nn/conv.h
 | 
				
			||||||
 | 
					@@ -52,6 +52,7 @@ class lru_unordered_map {
 | 
				
			||||||
 | 
					   lru_unordered_map(size_t max_size) : max_size_(max_size) {}
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					   void insert(const Key& key, const T& value) {
 | 
				
			||||||
 | 
					+    std::lock_guard<std::mutex> guard(mutex_);
 | 
				
			||||||
 | 
					     auto it = items_.find(key);
 | 
				
			||||||
 | 
					     if (it != items_.end()) {
 | 
				
			||||||
 | 
					       it->second.value = value;
 | 
				
			||||||
 | 
					@@ -69,6 +70,7 @@ class lru_unordered_map {
 | 
				
			||||||
 | 
					   }
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					   T& at(const Key& key) {
 | 
				
			||||||
 | 
					+    std::lock_guard<std::mutex> guard(mutex_);
 | 
				
			||||||
 | 
					     auto it = items_.find(key);
 | 
				
			||||||
 | 
					     if (it == items_.end()) {
 | 
				
			||||||
 | 
					       throw std::out_of_range("There is no such key in cache");
 | 
				
			||||||
 | 
					@@ -78,14 +80,17 @@ class lru_unordered_map {
 | 
				
			||||||
 | 
					   }
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					   bool contains(const Key& key) const {
 | 
				
			||||||
 | 
					+    std::lock_guard<std::mutex> guard(mutex_);
 | 
				
			||||||
 | 
					     return items_.find(key) != items_.end();
 | 
				
			||||||
 | 
					   }
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					   size_t size() const {
 | 
				
			||||||
 | 
					+    std::lock_guard<std::mutex> guard(mutex_);
 | 
				
			||||||
 | 
					     return items_.size();
 | 
				
			||||||
 | 
					   }
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					   void clear() {
 | 
				
			||||||
 | 
					+    std::lock_guard<std::mutex> guard(mutex_);
 | 
				
			||||||
 | 
					     items_.clear();
 | 
				
			||||||
 | 
					     lru_list_.clear();
 | 
				
			||||||
 | 
					   }
 | 
				
			||||||
 | 
					@@ -106,6 +111,7 @@ class lru_unordered_map {
 | 
				
			||||||
 | 
					   size_t max_size_;
 | 
				
			||||||
 | 
					   std::unordered_map<Key, value_type, Hash, KeyEqual, MapAllocator> items_;
 | 
				
			||||||
 | 
					   list_type lru_list_;
 | 
				
			||||||
 | 
					+  mutable std::mutex mutex_;
 | 
				
			||||||
 | 
					 };
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 // cached miopen descriptors
 | 
				
			||||||
 | 
					-- 
 | 
				
			||||||
 | 
					2.43.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -17,11 +17,11 @@ RUN mkdir /opt/armnn && \
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
# Warning: 26.3Gb of disk space required to pull this image
 | 
					# Warning: 26.3Gb of disk space required to pull this image
 | 
				
			||||||
# https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.rocm
 | 
					# https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.rocm
 | 
				
			||||||
FROM rocm/dev-ubuntu-24.04:6.2.4-complete AS builder-rocm
 | 
					FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder-rocm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
WORKDIR /code
 | 
					WORKDIR /code
 | 
				
			||||||
 | 
					
 | 
				
			||||||
RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.12-venv
 | 
					RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.10-venv
 | 
				
			||||||
# Install same version as the Dockerfile provided by onnxruntime
 | 
					# Install same version as the Dockerfile provided by onnxruntime
 | 
				
			||||||
RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.27.3-linux-x86_64.sh && \
 | 
					RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.27.3-linux-x86_64.sh && \
 | 
				
			||||||
    chmod +x cmake-3.27.3-linux-x86_64.sh && \
 | 
					    chmod +x cmake-3.27.3-linux-x86_64.sh && \
 | 
				
			||||||
@ -32,12 +32,11 @@ RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.27.3/cmake-3.
 | 
				
			|||||||
ENV PATH /code/cmake-3.27.3-linux-x86_64/bin:${PATH}
 | 
					ENV PATH /code/cmake-3.27.3-linux-x86_64/bin:${PATH}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Prepare onnxruntime repository & build onnxruntime
 | 
					# Prepare onnxruntime repository & build onnxruntime
 | 
				
			||||||
# Note: cannot upgrade from 1.19.2 as of writing until upstream updates the ROCm CI
 | 
					 | 
				
			||||||
RUN git clone --single-branch --branch v1.19.2 --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
 | 
					RUN git clone --single-branch --branch v1.19.2 --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
 | 
				
			||||||
WORKDIR /code/onnxruntime
 | 
					WORKDIR /code/onnxruntime
 | 
				
			||||||
# Fix for multi-threading based on comments in https://github.com/microsoft/onnxruntime/pull/19567
 | 
					# Fix for multi-threading based on comments in https://github.com/microsoft/onnxruntime/pull/19567
 | 
				
			||||||
COPY ./0001-fix-avoid-race-condition-for-rocm-conv-algo-caching.patch /tmp/
 | 
					COPY ./0001-guard-algo-benchmark-results.patch /tmp/
 | 
				
			||||||
RUN git apply /tmp/0001-fix-avoid-race-condition-for-rocm-conv-algo-caching.patch
 | 
					RUN git apply /tmp/0001-guard-algo-benchmark-results.patch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
 | 
					RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
 | 
				
			||||||
# Note: the `parallel` setting uses a substantial amount of RAM
 | 
					# Note: the `parallel` setting uses a substantial amount of RAM
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user