diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index 39457939..f1a19e0b 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -687,7 +687,7 @@ func (b *Backend) CacheConfig() ml.CacheConfig { if b.flashAttention { return ml.CacheConfig{CachePadding: 256, MaskDType: ml.DTypeF16, MaskBatchPadding: C.GGML_KQ_MASK_PAD} } else { - return ml.CacheConfig{CachePadding: 32, PermutedV: true} + return ml.CacheConfig{CachePadding: 256, PermutedV: true} } }