llm: normalise kvct parameter handling (#7926)

2025-12-23 23:18:26 +00:00 · 2024-12-04 11:30:40 +11:00
parent 1bdab9fdb1
commit 539be43640
2 changed files with 2 additions and 2 deletions
--- a/llm/server.go
+++ b/llm/server.go
@@ -225,7 +225,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
 		fa = false
 	}

-	kvct := envconfig.KvCacheType()
+	kvct := strings.ToLower(envconfig.KvCacheType())

 	if fa {
 		slog.Info("enabling flash attention")