mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-23 15:08:27 +00:00
use split activations when possible (#12293)
* use ggml_*_split activations when possible * forward qkv
This commit is contained in:
@@ -210,7 +210,7 @@ func (mlp *MLPBlock) Forward(ctx ml.Context, hiddenStates, one ml.Tensor, opts *
|
||||
up = mlp.Up.Forward(ctx, hiddenStates, selectedExperts)
|
||||
}
|
||||
|
||||
hiddenStates = gate.SwiGLU(ctx, up, 1.702, 7)
|
||||
hiddenStates = gate.SILUAlphaLimit(ctx, up, 1.702, 7)
|
||||
|
||||
experts := mlp.Down.Forward(ctx, hiddenStates, selectedExperts)
|
||||
experts = experts.Mul(ctx, routingWeights)
|
||||
|
||||
Reference in New Issue
Block a user