llama: update vendored code to commit 46e3556 (#8308)

2025-12-22 14:53:56 +00:00 · 2025-01-08 11:22:01 -08:00
parent 57f038ec7b
commit 1deafd8254
305 changed files with 16048 additions and 12926 deletions
--- a/llama/ggml-cpu.c
+++ b/llama/ggml-cpu.c
@@ -1,5 +1,5 @@
 /**
- * llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file
+ * llama.cpp - commit 46e3556e01b824e52395fb050b29804b6cff2a7c - do not edit this file
 *
 * MIT License
 *
@@ -1012,7 +1012,7 @@ inline static void __wasm_f16x4_store(ggml_fp16_t * p, v128_t x) {
 #define GGML_F16_STEP 32
 #define GGML_F16_EPR  4

-static inline __m128 __sse_f16x4_load(ggml_fp16_t *x) {
+static inline __m128 __sse_f16x4_load(const ggml_fp16_t * x) {
    float tmp[4];

    tmp[0] = GGML_FP16_TO_FP32(x[0]);
@@ -1023,7 +1023,7 @@ static inline __m128 __sse_f16x4_load(ggml_fp16_t *x) {
    return _mm_loadu_ps(tmp);
 }

-static inline void __sse_f16x4_store(ggml_fp16_t *x, __m128 y) {
+static inline void __sse_f16x4_store(ggml_fp16_t * x, __m128 y) {
    float arr[4];

    _mm_storeu_ps(arr, y);
@@ -7445,14 +7445,14 @@ static void ggml_compute_forward_mul_mat(
    if (src1_cont) {
        for (int64_t i13 = 0; i13 < ne13; i13++)
            for (int64_t i12 = 0; i12 < ne12; i12++)
-                if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
+                if (!llamafile_sgemm(params,
+                                     ne01, ne11, ne00/ggml_blck_size(src0->type),
                                     (const char *)src0->data + i12/r2*nb02 + i13/r3*nb03,
                                     nb01/ggml_type_size(src0->type),
                                     (const char *)src1->data + i12*nb12 + i13*nb13,
                                     nb11/ggml_type_size(src1->type),
                                     (char *)dst->data + i12*nb2 + i13*nb3,
                                     nb1/ggml_type_size(dst->type),
-                                     ith, nth,
                                     src0->type,
                                     src1->type,
                                     dst->type))
@@ -7497,14 +7497,14 @@ UseGgmlGemm1:;

        for (int64_t i13 = 0; i13 < ne13; i13++)
            for (int64_t i12 = 0; i12 < ne12; i12++)
-                if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
+                if (!llamafile_sgemm(params,
+                                     ne01, ne11, ne00/ggml_blck_size(src0->type),
                                     (const char *)src0->data + i12/r2*nb02 + i13/r3*nb03,
                                     nb01/ggml_type_size(src0->type),
                                     (const char *)wdata + (i12*ne11 + i13*ne12*ne11)*row_size,
                                     row_size/ggml_type_size(vec_dot_type),
                                     (char *)dst->data + i12*nb2 + i13*nb3,
                                     nb1/ggml_type_size(dst->type),
-                                     ith, nth,
                                     src0->type,
                                     vec_dot_type,
                                     dst->type))