GGML update to ec98e2002 (#13451)

* Revert "add support for NVIDIA Nemotron 3 Nano"

This reverts commit e7d2ae9d69421012e9a8765c06a3fdf0e45b12f3.

* GGML update to 380b4c984

Remove MaskBatchPadding as GGML_KQ_MASK_PAD is no longer present (no
padding required)

* update to c45f89d55

* ec98e2002

solar pro needed more adjusting - needs verification

* review comments
This commit is contained in:
Daniel Hiltgen
2025-12-17 13:13:55 -08:00
committed by GitHub
parent 1c094038bc
commit 49a9c9ba6a
127 changed files with 8128 additions and 6710 deletions

View File

@@ -11,10 +11,10 @@ Subject: [PATCH] graph memory reporting on failure
4 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/ggml/include/ggml-alloc.h b/ggml/include/ggml-alloc.h
index 2cb150fd2..7ab3f0192 100644
index 78aa059dd..7fa8403b3 100644
--- a/ggml/include/ggml-alloc.h
+++ b/ggml/include/ggml-alloc.h
@@ -65,6 +65,7 @@ GGML_API bool ggml_gallocr_reserve_n(
@@ -72,6 +72,7 @@ GGML_API bool ggml_gallocr_reserve_n(
GGML_API bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
GGML_API size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id);
@@ -23,10 +23,10 @@ index 2cb150fd2..7ab3f0192 100644
// Utils
// Create a buffer and allocate all the tensors in a ggml_context
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
index f1b740785..c54ff98bf 100644
index 4ed5f3577..a7ebe5dcd 100644
--- a/ggml/include/ggml-backend.h
+++ b/ggml/include/ggml-backend.h
@@ -318,6 +318,7 @@ extern "C" {
@@ -319,6 +319,7 @@ extern "C" {
GGML_API ggml_backend_buffer_type_t ggml_backend_sched_get_buffer_type(ggml_backend_sched_t sched, ggml_backend_t backend);
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
@@ -35,10 +35,10 @@ index f1b740785..c54ff98bf 100644
GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c
index a5995fdc2..dbfd8b5b2 100644
index 41419b617..73b39bfea 100644
--- a/ggml/src/ggml-alloc.c
+++ b/ggml/src/ggml-alloc.c
@@ -494,6 +494,7 @@ struct node_alloc {
@@ -485,6 +485,7 @@ struct node_alloc {
struct ggml_gallocr {
ggml_backend_buffer_type_t * bufts; // [n_buffers]
struct vbuffer ** buffers; // [n_buffers]
@@ -46,7 +46,7 @@ index a5995fdc2..dbfd8b5b2 100644
struct ggml_dyn_tallocr ** buf_tallocs; // [n_buffers]
int n_buffers;
@@ -517,6 +518,9 @@ ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs
@@ -508,6 +509,9 @@ ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs
galloc->buffers = calloc(n_bufs, sizeof(struct vbuffer *));
GGML_ASSERT(galloc->buffers != NULL);
@@ -56,7 +56,7 @@ index a5995fdc2..dbfd8b5b2 100644
galloc->buf_tallocs = calloc(n_bufs, sizeof(struct ggml_dyn_tallocr *));
GGML_ASSERT(galloc->buf_tallocs != NULL);
@@ -584,6 +588,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
@@ -575,6 +579,7 @@ void ggml_gallocr_free(ggml_gallocr_t galloc) {
ggml_hash_set_free(&galloc->hash_set);
free(galloc->hash_values);
free(galloc->bufts);
@@ -64,7 +64,7 @@ index a5995fdc2..dbfd8b5b2 100644
free(galloc->buffers);
free(galloc->buf_tallocs);
free(galloc->node_allocs);
@@ -899,6 +904,8 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
@@ -904,6 +909,8 @@ static bool ggml_gallocr_reserve_n_impl(
}
}
@@ -73,18 +73,19 @@ index a5995fdc2..dbfd8b5b2 100644
// reallocate buffers if needed
for (int i = 0; i < galloc->n_buffers; i++) {
// if the buffer type is used multiple times, we reuse the same buffer
@@ -933,14 +940,19 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
#endif
ggml_vbuffer_free(galloc->buffers[i]);
galloc->buffers[i] = ggml_vbuffer_alloc(galloc->bufts[i], galloc->buf_tallocs[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE);
- if (galloc->buffers[i] == NULL) {
+ if (galloc->buffers[i]) {
+ galloc->buffer_sizes[i] = ggml_vbuffer_size(galloc->buffers[i]);
+ } else {
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size);
- return false;
+ galloc->buffer_sizes[i] = new_size;
+ success = false;
@@ -940,15 +947,20 @@ static bool ggml_gallocr_reserve_n_impl(
galloc->buffers[i] = NULL;
} else {
galloc->buffers[i] = ggml_vbuffer_alloc(galloc->bufts[i], galloc->buf_tallocs[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE);
- if (galloc->buffers[i] == NULL) {
+ if (galloc->buffers[i]) {
+ galloc->buffer_sizes[i] = ggml_vbuffer_size(galloc->buffers[i]);
+ } else {
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size);
- return false;
+ galloc->buffer_sizes[i] = new_size;
+ success = false;
}
}
+ } else {
+ galloc->buffer_sizes[i] = ggml_vbuffer_size(galloc->buffers[i]);
@@ -95,8 +96,8 @@ index a5995fdc2..dbfd8b5b2 100644
+ return success;
}
bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) {
@@ -1095,6 +1107,22 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
void ggml_gallocr_reserve_n_size(
@@ -1118,6 +1130,22 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {
return ggml_vbuffer_size(galloc->buffers[buffer_id]);
}
@@ -120,10 +121,10 @@ index a5995fdc2..dbfd8b5b2 100644
static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
index afde2f0b7..dbf8486a0 100644
index 9f37ca70c..1459d16dd 100644
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -1840,6 +1840,13 @@ size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backe
@@ -1859,6 +1859,13 @@ size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backe
return ggml_gallocr_get_buffer_size(sched->galloc, backend_index);
}