llama: update llama.cpp vendor code to commit d7cfe1ff (#9356)

This commit is contained in:
Jeffrey Morgan
2025-02-26 20:34:44 -08:00
committed by GitHub
parent 2db96c18e7
commit d7d7e99662
149 changed files with 18215 additions and 11009 deletions

View File

@@ -30,7 +30,6 @@ struct llama_hparams {
bool use_par_res;
bool swin_norm;
uint32_t n_vocab = 0;
uint32_t n_ctx_train; // context size the model was trained on
uint32_t n_embd;
uint32_t n_embd_features = 0;
@@ -41,8 +40,8 @@ struct llama_hparams {
uint32_t n_embd_head_v; // dimension of values (d_v) aka n_embd_head
uint32_t n_expert = 0;
uint32_t n_expert_used = 0;
uint32_t n_vocab_type = 0; // for BERT-style token types
uint32_t n_rel_attn_bkts = 0;
uint32_t n_vocab = 0;
// for WavTokenizer
struct llama_hparams_posnet posnet;
@@ -79,6 +78,7 @@ struct llama_hparams {
uint32_t time_mix_extra_dim = 0;
uint32_t time_decay_extra_dim = 0;
uint32_t wkv_head_size = 0;
uint32_t token_shift_count = 2;
float rope_attn_factor = 1.0f;
float rope_freq_base_train;
@@ -141,7 +141,7 @@ struct llama_hparams {
// Block skip connection
bool n_bskcn(uint32_t n, uint32_t il) const;
// cross attention layers
// cross attention layers
bool cross_attention_layers(uint32_t il) const;
};