update llama.cpp

2025-12-21 22:33:56 +00:00 · 2023-08-03 11:49:58 -07:00
parent f0b365a478
commit c5bcf32823
19 changed files with 623 additions and 298 deletions
--- a/llama/llama.go
+++ b/llama/llama.go
@@ -128,11 +128,6 @@ func New(model string, opts api.Options) (*LLM, error) {

 	C.llama_backend_init(C.bool(llm.UseNUMA))

-	// TODO: GQA == 8 suggests 70B model which doesn't support metal
-	if llm.NumGQA == 8 {
-		llm.NumGPU = 0
-	}
-
 	params := C.llama_context_default_params()
 	params.seed = C.uint(llm.Seed)
 	params.n_ctx = C.int(llm.NumCtx)