mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
Add deepseek v3.1 (#13063)
* Add mla for flash attention * Revert to using chunks
This commit is contained in:
@@ -230,7 +230,7 @@ type Tensor interface {
|
||||
// kqv := value.Mulmat(ctx, kq)
|
||||
// return kqv.Permute(ctx, 0, 2, 1, 3).Contiguous(ctx)
|
||||
type ScaledDotProductAttention interface {
|
||||
ScaledDotProductAttention(ctx Context, key, value, mask, sinks Tensor, scale float64) Tensor
|
||||
ScaledDotProductAttention(ctx Context, key, value, mask, sinks Tensor, vmla Tensor, scale float64) Tensor
|
||||
}
|
||||
|
||||
type number interface {
|
||||
|
||||
Reference in New Issue
Block a user