mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
cuda: skip large batches
cuda panics on batches larger than 1024 so skip those and fallback to cpu
This commit is contained in:
committed by
Michael Yang
parent
92981ae3f2
commit
0796d79d19
@@ -38,7 +38,7 @@ index 44ae76d66..639d551a2 100644
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
index d2c278a35..221e29509 100644
|
||||
index ca02ea079..c12b069e5 100644
|
||||
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
@@ -73,6 +73,7 @@ DispatchLoaderDynamic & ggml_vk_default_dispatcher();
|
||||
|
||||
Reference in New Issue
Block a user