llm: Don't always evict models on CPU-only systems

Model eviction happens when we have at least one other model loaded and are unable to load all layers into VRAM. However, on CPU-only systems we can never load layers into VRAM, so this constantly triggered eviction. Fixes #13227
2025-12-21 22:33:56 +00:00 · 2025-11-25 14:51:02 -08:00
parent d771043e88
commit 5317202c38
2 changed files with 10 additions and 9 deletions
--- a/llm/server_test.go
+++ b/llm/server_test.go
@@ -26,10 +26,11 @@ func TestLLMServerFitGPU(t *testing.T) {
 		expectedErr error
 	}{
 		{
-			name:     "No GPU",
-			layers:   []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
-			numGPU:   -1,
-			expected: ml.GPULayersList{},
+			name:        "No GPU",
+			layers:      []int{50 * format.MebiByte, 50 * format.MebiByte, 50 * format.MebiByte},
+			numGPU:      -1,
+			expected:    ml.GPULayersList{},
+			requireFull: true, // Should not try to evict even though we can't load any layers
 		},
 		{
 			name:     "Full single GPU",