Add deepseek v3.1 (#13063)

* Add mla for flash attention * Revert to using chunks
2025-12-21 22:33:56 +00:00 · 2025-11-17 18:03:21 -08:00
parent 1fd4cb87b2
commit 584e2d646f
4 changed files with 67 additions and 24 deletions
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -230,7 +230,7 @@ type Tensor interface {
 // kqv := value.Mulmat(ctx, kq)
 // return kqv.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
 type ScaledDotProductAttention interface {
-	ScaledDotProductAttention(ctx Context, key, value, mask, sinks Tensor, scale float64) Tensor
+	ScaledDotProductAttention(ctx Context, key, value, mask, sinks Tensor, vmla Tensor, scale float64) Tensor
 }

 type number interface {