backend: Support graph computation that does not return an output

There are two cases where we may not have an output after computing: - Prompt processing where the length of the input exceeds the batch size - Internal memory management operations such as cache defrag and shift
2025-12-21 22:33:56 +00:00 · 2025-02-03 19:35:12 -08:00
parent 0e38297f87
commit 4d4463b2bd
3 changed files with 22 additions and 14 deletions
--- a/model/model.go
+++ b/model/model.go
@@ -275,5 +275,8 @@ func Forward(m Model, optsFuncs ...OptionsFunc) (ml.Tensor, error) {
 	}
 	defer ctx.Close()

-	return ctx.Compute(t), nil
+	ctx.Forward(t)
+	ctx.Compute(t)
+
+	return t, nil
 }