mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-21 22:33:56 +00:00
tests: add single threaded history test (#12295)
* tests: add single threaded history test Also tidies up some existing tests to handle more model output variation * test: add support for testing specific architectures
This commit is contained in:
@@ -40,6 +40,18 @@ var (
|
||||
// cat int.log | grep MODEL_PERF_HEADER | head -1| cut -f2- -d: > perf.csv
|
||||
// cat int.log | grep MODEL_PERF_DATA | cut -f2- -d: >> perf.csv
|
||||
func TestModelsPerf(t *testing.T) {
|
||||
if s := os.Getenv("OLLAMA_NEW_ENGINE"); s != "" {
|
||||
doModelPerfTest(t, ollamaEngineChatModels)
|
||||
} else {
|
||||
doModelPerfTest(t, append(ollamaEngineChatModels, llamaRunnerChatModels...))
|
||||
}
|
||||
}
|
||||
|
||||
func TestLibraryModelsPerf(t *testing.T) {
|
||||
doModelPerfTest(t, libraryChatModels)
|
||||
}
|
||||
|
||||
func doModelPerfTest(t *testing.T, chatModels []string) {
|
||||
softTimeout, hardTimeout := getTimeouts(t)
|
||||
slog.Info("Setting timeouts", "soft", softTimeout, "hard", hardTimeout)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), hardTimeout)
|
||||
@@ -65,14 +77,12 @@ func TestModelsPerf(t *testing.T) {
|
||||
}
|
||||
longPrompt := "summarize the following: " + string(data)
|
||||
|
||||
var chatModels []string
|
||||
if s := os.Getenv("OLLAMA_NEW_ENGINE"); s != "" {
|
||||
chatModels = ollamaEngineChatModels
|
||||
} else {
|
||||
chatModels = append(ollamaEngineChatModels, llamaRunnerChatModels...)
|
||||
}
|
||||
targetArch := os.Getenv("OLLAMA_TEST_ARCHITECTURE")
|
||||
|
||||
for _, model := range chatModels {
|
||||
if !strings.Contains(model, ":") {
|
||||
model = model + ":latest"
|
||||
}
|
||||
t.Run(model, func(t *testing.T) {
|
||||
if time.Now().Sub(started) > softTimeout {
|
||||
t.Skip("skipping remaining tests to avoid excessive runtime")
|
||||
@@ -88,6 +98,9 @@ func TestModelsPerf(t *testing.T) {
|
||||
}
|
||||
arch := resp.ModelInfo["general.architecture"].(string)
|
||||
maxContext = int(resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64))
|
||||
if targetArch != "" && arch != targetArch {
|
||||
t.Skip(fmt.Sprintf("Skipping %s architecture %s != %s", model, arch, targetArch))
|
||||
}
|
||||
|
||||
if maxVram > 0 {
|
||||
resp, err := client.List(ctx)
|
||||
@@ -151,8 +164,8 @@ func TestModelsPerf(t *testing.T) {
|
||||
prompt string
|
||||
anyResp []string
|
||||
}{
|
||||
{"why is the sky blue?", []string{"rayleigh", "scattering", "atmosphere", "nitrogen", "oxygen"}},
|
||||
{maxPrompt, []string{"shakespeare", "oppression", "sorrows", "gutenberg", "child", "license", "sonnet", "melancholy"}},
|
||||
{blueSkyPrompt, blueSkyExpected},
|
||||
{maxPrompt, []string{"shakespeare", "oppression", "sorrows", "gutenberg", "child", "license", "sonnet", "melancholy", "love", "sorrow", "beauty"}},
|
||||
}
|
||||
var gpuPercent int
|
||||
for _, tc := range testCases {
|
||||
@@ -241,11 +254,12 @@ func TestModelsPerf(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
// Round the logged prompt count for comparisons across versions/configurations which can vary slightly
|
||||
fmt.Fprintf(os.Stderr, "MODEL_PERF_HEADER:%s,%s,%s,%s,%s,%s,%s\n",
|
||||
"MODEL",
|
||||
"CONTEXT",
|
||||
"GPU PERCENT",
|
||||
"PROMPT COUNT",
|
||||
"APPROX PROMPT COUNT",
|
||||
"LOAD TIME",
|
||||
"PROMPT EVAL TPS",
|
||||
"EVAL TPS",
|
||||
@@ -254,7 +268,7 @@ func TestModelsPerf(t *testing.T) {
|
||||
model,
|
||||
numCtx,
|
||||
gpuPercent,
|
||||
resp.PromptEvalCount,
|
||||
(resp.PromptEvalCount/10)*10,
|
||||
float64(resp.LoadDuration)/1000000000.0,
|
||||
float64(resp.PromptEvalCount)/(float64(resp.PromptEvalDuration)/1000000000.0),
|
||||
float64(resp.EvalCount)/(float64(resp.EvalDuration)/1000000000.0),
|
||||
|
||||
Reference in New Issue
Block a user