int: adjust a few models for integration tests (#11872)

This commit is contained in:
Daniel Hiltgen
2025-08-13 15:42:36 -07:00
committed by GitHub
parent dc5a645434
commit a24f90604f

View File

@@ -4,7 +4,9 @@ package integration
import ( import (
"context" "context"
"fmt"
"log/slog" "log/slog"
"math"
"os" "os"
"strconv" "strconv"
"sync" "sync"
@@ -21,7 +23,7 @@ func TestMultiModelConcurrency(t *testing.T) {
var ( var (
req = [2]api.GenerateRequest{ req = [2]api.GenerateRequest{
{ {
Model: "llama3.2:1b", Model: smol,
Prompt: "why is the ocean blue?", Prompt: "why is the ocean blue?",
Stream: &stream, Stream: &stream,
KeepAlive: &api.Duration{Duration: 10 * time.Second}, KeepAlive: &api.Duration{Duration: 10 * time.Second},
@@ -30,7 +32,7 @@ func TestMultiModelConcurrency(t *testing.T) {
"temperature": 0.0, "temperature": 0.0,
}, },
}, { }, {
Model: "tinydolphin", Model: "qwen3:0.6b",
Prompt: "what is the origin of the us thanksgiving holiday?", Prompt: "what is the origin of the us thanksgiving holiday?",
Stream: &stream, Stream: &stream,
KeepAlive: &api.Duration{Duration: 10 * time.Second}, KeepAlive: &api.Duration{Duration: 10 * time.Second},
@@ -132,16 +134,16 @@ func TestMultiModelStress(t *testing.T) {
size: 2876 * format.MebiByte, size: 2876 * format.MebiByte,
}, },
{ {
name: "phi", name: "qwen3:0.6b",
size: 2616 * format.MebiByte, size: 1600 * format.MebiByte,
}, },
{ {
name: "gemma:2b", name: "gemma:2b",
size: 2364 * format.MebiByte, size: 2364 * format.MebiByte,
}, },
{ {
name: "stable-code:3b", name: "deepseek-r1:1.5b",
size: 2608 * format.MebiByte, size: 2048 * format.MebiByte,
}, },
{ {
name: "starcoder2:3b", name: "starcoder2:3b",
@@ -149,17 +151,21 @@ func TestMultiModelStress(t *testing.T) {
}, },
} }
mediumModels := []model{ mediumModels := []model{
{
name: "qwen3:8b",
size: 6600 * format.MebiByte,
},
{ {
name: "llama2", name: "llama2",
size: 5118 * format.MebiByte, size: 5118 * format.MebiByte,
}, },
{ {
name: "mistral", name: "deepseek-r1:7b",
size: 4620 * format.MebiByte, size: 5600 * format.MebiByte,
}, },
{ {
name: "orca-mini:7b", name: "mistral",
size: 5118 * format.MebiByte, size: 4620 * format.MebiByte,
}, },
{ {
name: "dolphin-mistral", name: "dolphin-mistral",
@@ -254,7 +260,7 @@ func TestMultiModelStress(t *testing.T) {
} }
go func() { go func() {
for { for {
time.Sleep(2 * time.Second) time.Sleep(10 * time.Second)
select { select {
case <-ctx.Done(): case <-ctx.Done():
return return
@@ -265,7 +271,21 @@ func TestMultiModelStress(t *testing.T) {
continue continue
} }
for _, m := range models.Models { for _, m := range models.Models {
slog.Info("loaded model snapshot", "model", m) var procStr string
switch {
case m.SizeVRAM == 0:
procStr = "100% CPU"
case m.SizeVRAM == m.Size:
procStr = "100% GPU"
case m.SizeVRAM > m.Size || m.Size == 0:
procStr = "Unknown"
default:
sizeCPU := m.Size - m.SizeVRAM
cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100)
procStr = fmt.Sprintf("%d%%/%d%%", int(cpuPercent), int(100-cpuPercent))
}
slog.Info("loaded model snapshot", "model", m.Name, "CPU/GPU", procStr, "expires", format.HumanTime(m.ExpiresAt, "Never"))
} }
} }
} }