Add deepseek v3.1 (#13063)

* Add mla for flash attention
* Revert to using chunks
This commit is contained in:
Grace
2025-11-17 18:03:21 -08:00
committed by GitHub
parent 1fd4cb87b2
commit 584e2d646f
4 changed files with 67 additions and 24 deletions

View File

@@ -230,7 +230,7 @@ type Tensor interface {
// kqv := value.Mulmat(ctx, kq)
// return kqv.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
type ScaledDotProductAttention interface {
ScaledDotProductAttention(ctx Context, key, value, mask, sinks Tensor, scale float64) Tensor
ScaledDotProductAttention(ctx Context, key, value, mask, sinks Tensor, vmla Tensor, scale float64) Tensor
}
type number interface {