llm: normalise kvct parameter handling (#7926)

This commit is contained in:
Sam
2024-12-04 11:30:40 +11:00
committed by GitHub
parent 1bdab9fdb1
commit 539be43640
2 changed files with 2 additions and 2 deletions

View File

@@ -225,7 +225,7 @@ func NewLlamaServer(gpus discover.GpuInfoList, model string, ggml *GGML, adapter
fa = false
}
kvct := envconfig.KvCacheType()
kvct := strings.ToLower(envconfig.KvCacheType())
if fa {
slog.Info("enabling flash attention")