mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 14:26:30 +00:00
ggml: Increase maximum graph size
The initial implementation of qwen3-vl:235b exceeded the maximum graph size based on the number of tensors. Although this was later fixed through the use of the mrope operation, we are close to the limit in some cases. This updates to track the current llama.cpp usage of GGML.
This commit is contained in:
@@ -378,7 +378,7 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
|
||||
}
|
||||
}
|
||||
|
||||
maxGraphNodes := max(8192, len(meta.Tensors().Items())*5)
|
||||
maxGraphNodes := max(1024, len(meta.Tensors().Items())*8)
|
||||
|
||||
sched := C.ggml_backend_sched_new_ext(
|
||||
(*C.ggml_backend_t)(unsafe.Pointer(&schedBackends[0])),
|
||||
|
||||
Reference in New Issue
Block a user