mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
backend: Support graph computation that does not return an output
There are two cases where we may not have an output after computing: - Prompt processing where the length of the input exceeds the batch size - Internal memory management operations such as cache defrag and shift
This commit is contained in:
@@ -275,5 +275,8 @@ func Forward(m Model, optsFuncs ...OptionsFunc) (ml.Tensor, error) {
|
||||
}
|
||||
defer ctx.Close()
|
||||
|
||||
return ctx.Compute(t), nil
|
||||
ctx.Forward(t)
|
||||
ctx.Compute(t)
|
||||
|
||||
return t, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user