diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index d34a6913..920eff74 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -870,11 +870,6 @@ func (f GGML) SupportsKVCacheType(cacheType string) bool { return true } - if arch := f.KV().Architecture(); slices.Contains([]string{"gptoss", "gpt-oss"}, arch) { - // gpt-oss uses attention with sinks which does not support quantized cache types - slog.Warn("model only supports non-quantized cache types", "model", arch) - return false - } return slices.Contains([]string{"q8_0", "q4_0"}, cacheType) }