cuda: skip large batches

cuda panics on batches larger than 1024 so skip those and fallback to cpu
2025-12-21 22:33:56 +00:00 · 2025-11-18 11:13:37 -08:00
parent 92981ae3f2
commit 0796d79d19
9 changed files with 35 additions and 7 deletions
--- a/llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
+++ b/llama/patches/0028-Add-memory-detection-using-DXGI-PDH.patch
@@ -38,7 +38,7 @@ index 44ae76d66..639d551a2 100644
 #ifdef __cplusplus
 }
 diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-index d2c278a35..221e29509 100644
+index ca02ea079..c12b069e5 100644
 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
 +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -73,6 +73,7 @@ DispatchLoaderDynamic & ggml_vk_default_dispatcher();