mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-22 23:03:55 +00:00
ggml update to b7108 (#12992)
* Revert "vulkan: temporary cary of vulkan fixes (#12971)"
This reverts commit 3a9e8e9fd4.
* ggml update to b7087
* fix argsort on metal
* update to b7108
* fix bakllava regression
This model lacks the metadata for the projector type.
* update to b7209
* fix TopK perf
* only build arm code on arm
This commit is contained in:
@@ -8,12 +8,12 @@ Subject: [PATCH] decode: disable output_all
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
|
||||
index bd348bcad..8b4a89d38 100644
|
||||
index e04f0fc4f..1359c614b 100644
|
||||
--- a/src/llama-context.cpp
|
||||
+++ b/src/llama-context.cpp
|
||||
@@ -974,8 +974,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
|
||||
@@ -999,8 +999,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
|
||||
const int64_t n_vocab = vocab.n_tokens();
|
||||
const int64_t n_embd = hparams.n_embd;
|
||||
const int64_t n_embd = hparams.n_embd_inp();
|
||||
|
||||
- // when computing embeddings, all tokens are output
|
||||
- const bool output_all = cparams.embeddings;
|
||||
|
||||
Reference in New Issue
Block a user