ggml update to b7108 (#12992)

* Revert "vulkan: temporary cary of vulkan fixes (#12971)" This reverts commit 3a9e8e9fd4. * ggml update to b7087 * fix argsort on metal * update to b7108 * fix bakllava regression This model lacks the metadata for the projector type. * update to b7209 * fix TopK perf * only build arm code on arm
2025-12-22 23:03:55 +00:00 · 2025-12-03 19:43:29 -08:00
parent 854d40edc5
commit 0cf7794b16
303 changed files with 32711 additions and 23435 deletions
--- a/llama/patches/0021-decode-disable-output_all.patch
+++ b/llama/patches/0021-decode-disable-output_all.patch
@@ -8,12 +8,12 @@ Subject: [PATCH] decode: disable output_all
 1 file changed, 1 insertion(+), 2 deletions(-)

 diff --git a/src/llama-context.cpp b/src/llama-context.cpp
-index bd348bcad..8b4a89d38 100644
+index e04f0fc4f..1359c614b 100644
 --- a/src/llama-context.cpp
 +++ b/src/llama-context.cpp
-@@ -974,8 +974,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
+@@ -999,8 +999,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
     const int64_t n_vocab = vocab.n_tokens();
-     const int64_t n_embd  = hparams.n_embd;
+     const int64_t n_embd  = hparams.n_embd_inp();
 
 -    // when computing embeddings, all tokens are output
 -    const bool output_all = cparams.embeddings;