From 75d17fc6c20514417193882de7a0ece75ee301d1 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Wed, 15 Oct 2025 11:52:14 -0700 Subject: [PATCH] perf: backport cuda iGPU sched spin (#12641) --- ...e-CUDA-scheduling-strategy-to-spin-1.patch | 49 +++++++++++++++++++ .../ggml/ggml/src/ggml-cuda/ggml-cuda.cu | 9 ++++ 2 files changed, 58 insertions(+) create mode 100644 llama/patches/0030-CUDA-Changing-the-CUDA-scheduling-strategy-to-spin-1.patch diff --git a/llama/patches/0030-CUDA-Changing-the-CUDA-scheduling-strategy-to-spin-1.patch b/llama/patches/0030-CUDA-Changing-the-CUDA-scheduling-strategy-to-spin-1.patch new file mode 100644 index 00000000..c3c7fedf --- /dev/null +++ b/llama/patches/0030-CUDA-Changing-the-CUDA-scheduling-strategy-to-spin-1.patch @@ -0,0 +1,49 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Julius Tischbein +Date: Wed, 15 Oct 2025 13:54:15 +0200 +Subject: [PATCH] CUDA: Changing the CUDA scheduling strategy to spin (#16585) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +* CUDA set scheduling strategy to spinning for cc121 + +* Using prop.major and prop.minor, include HIP and MUSA + +* Exclude HIP and MUSA + +* Remove trailing whitespace + +Co-authored-by: Johannes Gäßler + +* Remove empty line + +Co-authored-by: Johannes Gäßler + +--------- + +Co-authored-by: Johannes Gäßler +--- + ggml/src/ggml-cuda/ggml-cuda.cu | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu +index 6a278b5e9..87941f872 100644 +--- a/ggml/src/ggml-cuda/ggml-cuda.cu ++++ b/ggml/src/ggml-cuda/ggml-cuda.cu +@@ -340,6 +340,15 @@ static ggml_cuda_device_info ggml_cuda_init() { + } else if (device_name.substr(0, 21) == "NVIDIA GeForce GTX 16") { + turing_devices_without_mma.push_back({ id, device_name }); + } ++ ++ // Temporary performance fix: ++ // Setting device scheduling strategy for iGPUs with cc121 to "spinning" to avoid delays in cuda synchronize calls. ++ // TODO: Check for future drivers the default scheduling strategy and ++ // remove this call again when cudaDeviceScheduleSpin is default. ++ if (prop.major == 12 && prop.minor == 1) { ++ CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin)); ++ } ++ + #endif // defined(GGML_USE_HIP) + } + diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu index 6a278b5e..87941f87 100644 --- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu @@ -340,6 +340,15 @@ static ggml_cuda_device_info ggml_cuda_init() { } else if (device_name.substr(0, 21) == "NVIDIA GeForce GTX 16") { turing_devices_without_mma.push_back({ id, device_name }); } + + // Temporary performance fix: + // Setting device scheduling strategy for iGPUs with cc121 to "spinning" to avoid delays in cuda synchronize calls. + // TODO: Check for future drivers the default scheduling strategy and + // remove this call again when cudaDeviceScheduleSpin is default. + if (prop.major == 12 && prop.minor == 1) { + CUDA_CHECK(cudaSetDeviceFlags(cudaDeviceScheduleSpin)); + } + #endif // defined(GGML_USE_HIP) }