From ef549d513ce37729b0b29984eb738856da7d6b89 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Thu, 30 Oct 2025 14:30:31 -0700 Subject: [PATCH] ggml: Increase maximum graph size The initial implementation of qwen3-vl:235b exceeded the maximum graph size based on the number of tensors. Although this was later fixed through the use of the mrope operation, we are close to the limit in some cases. This updates to track the current llama.cpp usage of GGML. --- ml/backend/ggml/ggml.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index eb02c3b1..c02926b3 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -378,7 +378,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) { } } - maxGraphNodes := max(8192, len(meta.Tensors().Items())*5) + maxGraphNodes := max(1024, len(meta.Tensors().Items())*8) sched := C.ggml_backend_sched_new_ext( (*C.ggml_backend_t)(unsafe.Pointer(&schedBackends[0])),