mirror of
https://github.com/likelovewant/ollama-for-amd.git
synced 2025-12-22 06:43:57 +00:00
ggml update to b7108 (#12992)
* Revert "vulkan: temporary cary of vulkan fixes (#12971)"
This reverts commit 3a9e8e9fd4.
* ggml update to b7087
* fix argsort on metal
* update to b7108
* fix bakllava regression
This model lacks the metadata for the projector type.
* update to b7209
* fix TopK perf
* only build arm code on arm
This commit is contained in:
8
llama/llama.cpp/src/llama-hparams.h
vendored
8
llama/llama.cpp/src/llama-hparams.h
vendored
@@ -6,7 +6,7 @@
|
||||
|
||||
// bump if necessary
|
||||
#define LLAMA_MAX_LAYERS 512
|
||||
#define LLAMA_MAX_EXPERTS 384 // Kimi-K2
|
||||
#define LLAMA_MAX_EXPERTS 512 // Qwen3 Next
|
||||
|
||||
enum llama_expert_gating_func_type {
|
||||
LLAMA_EXPERT_GATING_FUNC_TYPE_NONE = 0,
|
||||
@@ -185,6 +185,9 @@ struct llama_hparams {
|
||||
std::array<float, LLAMA_MAX_LAYERS> xielu_beta;
|
||||
std::array<float, LLAMA_MAX_LAYERS> xielu_eps;
|
||||
|
||||
// qwen3vl deepstack
|
||||
uint32_t n_deepstack_layers = 0;
|
||||
|
||||
// needed by encoder-decoder models (e.g. T5, FLAN-T5)
|
||||
// ref: https://github.com/ggerganov/llama.cpp/pull/8141
|
||||
llama_token dec_start_token_id = LLAMA_TOKEN_NULL;
|
||||
@@ -226,6 +229,9 @@ struct llama_hparams {
|
||||
|
||||
uint32_t n_gqa(uint32_t il = 0) const;
|
||||
|
||||
// dimension of main + auxiliary input embeddings
|
||||
uint32_t n_embd_inp() const;
|
||||
|
||||
// dimension of key embeddings across all k-v heads
|
||||
uint32_t n_embd_k_gqa(uint32_t il = 0) const;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user