use split activations when possible (#12293)

* use ggml_*_split activations when possible * forward qkv
2025-12-23 15:08:27 +00:00 · 2025-09-16 09:51:19 -07:00
parent c253433d68
commit ad95d5b30b
16 changed files with 59 additions and 50 deletions
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -430,12 +430,13 @@ type Tensor interface {
 	Sin(ctx Context) Tensor
 	Cos(ctx Context) Tensor
 	Tanh(ctx Context) Tensor
-	GELU(ctx Context) Tensor
-	QuickGELU(ctx Context) Tensor
-	SILU(ctx Context) Tensor
-	RELU(ctx Context) Tensor
+	GELU(ctx Context, up ...Tensor) Tensor
+	SILU(ctx Context, up ...Tensor) Tensor
+	RELU(ctx Context, up ...Tensor) Tensor
 	Sigmoid(ctx Context) Tensor
-	SwiGLU(ctx Context, up Tensor, alpha, limit float32) Tensor
+
+	// AlphaLimitSILU is a variant of SILU that clamps the input to the range [-limit, limit]
+	SILUAlphaLimit(ctx Context, up Tensor, alpha, limit float32) Tensor

 	Reshape(ctx Context, shape ...int) Tensor
 	View(ctx Context, offset int, shape ...int) Tensor