llama: update llama.cpp vendor code to commit d7cfe1ff (#9356)

2025-12-21 22:33:56 +00:00 · 2025-02-26 20:34:44 -08:00
parent 2db96c18e7
commit d7d7e99662
149 changed files with 18215 additions and 11009 deletions
--- a/llama/llama.cpp/src/llama-hparams.h
+++ b/llama/llama.cpp/src/llama-hparams.h
@@ -30,7 +30,6 @@ struct llama_hparams {
    bool use_par_res;
    bool swin_norm;

-    uint32_t n_vocab = 0;
    uint32_t n_ctx_train; // context size the model was trained on
    uint32_t n_embd;
    uint32_t n_embd_features = 0;
@@ -41,8 +40,8 @@ struct llama_hparams {
    uint32_t n_embd_head_v; // dimension of values (d_v) aka n_embd_head
    uint32_t n_expert = 0;
    uint32_t n_expert_used = 0;
-    uint32_t n_vocab_type = 0; // for BERT-style token types
    uint32_t n_rel_attn_bkts = 0;
+    uint32_t n_vocab = 0;

    // for WavTokenizer
    struct llama_hparams_posnet   posnet;
@@ -79,6 +78,7 @@ struct llama_hparams {
    uint32_t time_mix_extra_dim     = 0;
    uint32_t time_decay_extra_dim   = 0;
    uint32_t wkv_head_size          = 0;
+    uint32_t token_shift_count      = 2;

    float    rope_attn_factor = 1.0f;
    float    rope_freq_base_train;
@@ -141,7 +141,7 @@ struct llama_hparams {
    // Block skip connection
    bool n_bskcn(uint32_t n, uint32_t il) const;

-    // cross attention layers   
+    // cross attention layers
    bool cross_attention_layers(uint32_t il) const;
 };