ollamarunner: Memory usage reporting

This provides granular information about the backend memory allocations required by the runner: - Per backend - Per layer - Weights, cache and graph - Allocation status This can be used for debugging and validating memory estimates.
2025-12-21 14:26:30 +00:00 · 2025-04-17 11:00:25 -07:00
parent 6db8a3771c
commit 73d6a82cce
5 changed files with 224 additions and 78 deletions
--- a/kvcache/causal_test.go
+++ b/kvcache/causal_test.go
@@ -508,7 +508,7 @@ func (c *testContext) Forward(...ml.Tensor) ml.Context { return c }

 func (c *testContext) Compute(...ml.Tensor) {}

-func (c *testContext) Reserve() error { return nil }
+func (c *testContext) Reserve() {}

 func (c *testContext) MaxGraphNodes() int {
 	return 10