mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-25 07:58:01 +00:00
cuda: skip large batches
cuda panics on batches larger than 1024 so skip those and fallback to cpu
This commit is contained in:
committed by
Michael Yang
parent
92981ae3f2
commit
0796d79d19
@@ -31,7 +31,7 @@ Add new backend tests.
|
||||
6 files changed, 371 insertions(+), 117 deletions(-)
|
||||
|
||||
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
index b2855b078..aaf4334b5 100644
|
||||
index 63a762ec2..db92a7901 100644
|
||||
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
@@ -458,6 +458,11 @@ static topk_moe_mode ggml_vk_num_additional_ops_to_topk_moe_mode(uint32_t num) {
|
||||
|
||||
Reference in New Issue
Block a user